cgma
IdHasher.hpp
Go to the documentation of this file.
00001 
00002 #ifndef CUBIT_HASHER
00003 #define CUBIT_HASHER
00004 
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <vector>
00008 #include <assert.h>
00009 
00010 // Hasher class optimized for:
00011 //   simple integer keys
00012 //   fairly contiguous keys (not memory efficient for non-contiguous keys)
00013 // provides constant time lookups, insertions, and removals
00014 //   with emphasis on fast lookups
00015 // T is sub container type, B is number of hashing bits
00016 template <class Y, unsigned int B>
00017 class Hasher
00018 {
00019 public:
00020 
00021   struct IterTag : public std::forward_iterator_tag {};
00022   template <class T> struct BaseIter
00023   {
00024     typedef IterTag iterator_category;
00025     typedef int difference_type;
00026     typedef T value_type;
00027     typedef T* pointer;
00028     typedef T& reference;
00029   };
00030   
00031   class iterator;
00032   typedef iterator const_iterator;
00033 
00034   typedef unsigned int Key;
00035   typedef typename Y::value_type value_type;
00036 
00037   // number of bits we represent
00038   enum { NumberOfBits = Y::NumberOfBits + B };
00039 
00040   // number of bits our sub containers manage
00041   enum { SubSize = Y::NumberOfBits };
00042 
00043   // number of sub holders we have
00044   enum { MySize = 1 << B };
00045 
00046   // helper mask
00047   enum { MyMask = ( 1 << SubSize ) - 1 };
00048 
00049   value_type Null;
00050 
00051   // constructor
00052   Hasher() : Null(0)
00053   {
00054     for(Key i=0; i<MySize; i++)
00055       mChildren[i] = 0;
00056     mCount=0;
00057   }
00058   
00059   // destructor
00060   ~Hasher()
00061   {
00062     for(Key i=0; i<MySize; i++)
00063       if(mChildren[i])
00064         delete mChildren[i];
00065   }
00066 
00067 
00068   // add a value for a key
00069   bool add(Key idx, value_type ptr)
00070   {
00071     assert(ptr != 0);
00072     Key myidx = my_index(idx);
00073     Y* y = mChildren[myidx];
00074     if(!y)
00075     {
00076       y = mChildren[myidx] = new Y();
00077     }
00078     bool inserted = y->add(sub_index(idx), ptr);
00079     if(inserted)
00080     {
00081       this->mCount++;
00082     }
00083     return inserted;
00084   }
00085 
00086   // remove a key
00087   bool remove(Key idx)
00088   {
00089     Key myidx = my_index(idx);
00090     Y* y = mChildren[myidx];
00091     bool removed = y ? y->remove(sub_index(idx)) : false;
00092     if(removed)
00093     {
00094       this->mCount--;
00095       if(y->count() == 0)
00096       {
00097         delete mChildren[myidx];
00098         mChildren[myidx] = 0;
00099       }
00100     }
00101     return removed;
00102   }
00103 
00104   // lookup a value
00105   const value_type& operator[](Key idx) const
00106   {
00107     Key myidx = my_index(idx);
00108     if(mChildren[myidx])
00109       return (*mChildren[myidx])[sub_index(idx)];
00110     return this->Null;
00111   }
00112   
00113   value_type& operator[](Key idx)
00114   {
00115     Key myidx = my_index(idx);
00116     if(mChildren[myidx])
00117       return (*mChildren[myidx])[sub_index(idx)];
00118     return this->Null;
00119   }
00120   
00121   Key count() const
00122   {
00123     return mCount;
00124   }
00125 
00126   iterator begin()
00127   {
00128     return iterator(this->mChildren, true);
00129   }
00130   
00131   iterator end()
00132   {
00133     return iterator(this->mChildren, false);
00134   }
00135 
00136   class iterator : public BaseIter< typename Y::value_type >
00137   {
00138       friend class Hasher;
00139       typedef typename Y::value_type& reference;
00140       enum { MySize = 1 << B };
00141     public:
00142 
00143       iterator() : mChildren(NULL), index(MySize) {}
00144       iterator(const iterator& copy)
00145       {
00146         this->operator=(copy);
00147       }
00148 
00149       // prefix incrementer
00150       iterator& operator++()
00151       {
00152         if(index != MySize)
00153         {
00154           ++iter;
00155           typename Y::iterator e = mChildren[index]->end();
00156           for(; this->iter != e && !*iter; ++iter) {}
00157           if(iter == e)
00158           {
00159             for(++index; index < MySize && !mChildren[index]; ++index) {}
00160             if(index != MySize)
00161             {
00162               e = mChildren[index]->end();
00163               for(iter = mChildren[index]->begin(); this->iter != e && !*iter; ++iter) {}
00164             }
00165           }
00166         }
00167         return *this;
00168       }
00169 
00170       // postfix incrementer
00171       iterator operator++(int)
00172       {
00173         iterator tmp(*this);
00174         this->operator++();
00175         return tmp;
00176       }
00177       
00178       bool operator==(const iterator& other) const
00179       {
00180         if( MySize == this->index && MySize == other.index)
00181           return true;
00182         return this->index == other.index && this->iter == other.iter;
00183       }
00184 
00185       bool operator!=(const iterator& other) const
00186       {
00187         return !operator==(other);
00188       }
00189 
00190       reference operator*()
00191       {
00192         return *iter;
00193       }
00194 
00195       iterator& operator=(const iterator& other)
00196       {
00197         this->mChildren = other.mChildren;
00198         this->index = other.index;
00199         if(other.mChildren)
00200           this->iter = other.iter;
00201         return *this;
00202       }
00203       
00204     protected:
00205       iterator(Y** c, bool b)
00206       {
00207         mChildren = c;
00208         index = MySize;
00209         if(b)
00210         {
00211           Y** s = c;
00212           for(; s<c+MySize && !*s; s++){}
00213           if(s<c+MySize && *s)
00214           {
00215             index = s - c;
00216             iter = (*s)->begin();
00217             for(; 0 == *iter; ++iter) {}
00218           }
00219         }
00220       }
00221 
00222       Y** mChildren;
00223       size_t index;
00224       typename Y::iterator iter;
00225   };
00226 
00227 
00228   // given key, get index for which subcontainer
00229   Key my_index(Key idx) const
00230   {
00231     return idx >> SubSize;
00232   }
00233 
00234   // given key, get index for subcontainer
00235   Key sub_index(Key idx) const
00236   {
00237     return idx & MyMask;
00238   }
00239 
00240 private:
00241 
00242   Key mCount;
00243   Y* mChildren[MySize];
00244 };
00245 
00246 
00247 // vector class used as the real storage for the hasher
00248 template <class Y, unsigned int S>
00249 class HasherVector : public std::vector<Y>
00250 {
00251   public:
00252     typedef unsigned int Key;
00253     typedef typename std::vector<Y>::value_type value_type;
00254     enum { NumberOfBits = S };
00255 
00256     HasherVector() 
00257         : std::vector<Y>(1<<S, (Y)0), mCount(0) 
00258       {
00259       }
00260 
00261     bool add(Key idx, value_type& ptr)
00262     {
00263       Y& p = (*this)[idx];
00264       if(!p)
00265       {
00266         p = ptr;
00267         ++this->mCount;
00268         return true;
00269       }
00270       return false;
00271     }
00272 
00273     bool remove(Key idx)
00274     {
00275       Y& p = (*this)[idx];
00276       if(p)
00277       {
00278         p = 0;
00279         --this->mCount;
00280         return true;
00281       }
00282       return false;
00283     }
00284 
00285     Key count() const
00286     {
00287       return mCount;
00288     }
00289 
00290   private:
00291     Key mCount;
00292 };
00293 
00294 #if 0
00295 int main(int, char**)
00296 {
00297 
00298   Hasher <10, HasherVector<int> > myHash;
00299 
00300   for(int i = 1; i < 5000000; i++)
00301   {
00302     myHash.add(i,i);
00303   }
00304 
00305   //printf("finished...\n");
00306 }
00307 
00308 
00309   typedef Hasher<5, Hasher<5, HasherVector<int*> > > MyHash;
00310   MyHash mytree(15);
00311 
00312   //mytree.add(1,(int*)(0x01));
00313   
00314   //std::vector<int*> vec;
00315 
00316 
00317   //HasherVector<int*> myhasher(5, (int*)0x055);
00318   //HasherVector<int*>::iterator jter = myhasher.begin();
00319   //for(; jter != myhasher.end(); ++jter)
00320   //{
00321   //  int* val = *jter;
00322   //}
00323 
00324   for(int i=0; i<10000; i++)
00325   {
00326     //unsigned int key = rand();
00327      unsigned int key = i;
00328     int* value = (int*)rand();
00329     bool added = mytree.add(key, value);
00330     if(!added)
00331       printf("wasn't added\n");
00332     for(int j=0; j<1000; j++)
00333       mytree[key];
00334     if(value != mytree[key])
00335       printf("error %i != %i\n", (int)value, (int)mytree[key]);
00336     //mytree.remove(key);
00337   }
00338   
00339   MyHash::iterator iter = mytree.begin();
00340   for(; iter != mytree.end(); ++iter)
00341   {
00342     int* val = *iter;
00343   }
00344 }
00345 #endif
00346 
00347 #endif // #ifndef CUBIT_HASHER
00348 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines