cgma
|
00001 00002 #ifndef CUBIT_HASHER 00003 #define CUBIT_HASHER 00004 00005 #include <stdio.h> 00006 #include <stdlib.h> 00007 #include <vector> 00008 #include <assert.h> 00009 00010 // Hasher class optimized for: 00011 // simple integer keys 00012 // fairly contiguous keys (not memory efficient for non-contiguous keys) 00013 // provides constant time lookups, insertions, and removals 00014 // with emphasis on fast lookups 00015 // T is sub container type, B is number of hashing bits 00016 template <class Y, unsigned int B> 00017 class Hasher 00018 { 00019 public: 00020 00021 struct IterTag : public std::forward_iterator_tag {}; 00022 template <class T> struct BaseIter 00023 { 00024 typedef IterTag iterator_category; 00025 typedef int difference_type; 00026 typedef T value_type; 00027 typedef T* pointer; 00028 typedef T& reference; 00029 }; 00030 00031 class iterator; 00032 typedef iterator const_iterator; 00033 00034 typedef unsigned int Key; 00035 typedef typename Y::value_type value_type; 00036 00037 // number of bits we represent 00038 enum { NumberOfBits = Y::NumberOfBits + B }; 00039 00040 // number of bits our sub containers manage 00041 enum { SubSize = Y::NumberOfBits }; 00042 00043 // number of sub holders we have 00044 enum { MySize = 1 << B }; 00045 00046 // helper mask 00047 enum { MyMask = ( 1 << SubSize ) - 1 }; 00048 00049 value_type Null; 00050 00051 // constructor 00052 Hasher() : Null(0) 00053 { 00054 for(Key i=0; i<MySize; i++) 00055 mChildren[i] = 0; 00056 mCount=0; 00057 } 00058 00059 // destructor 00060 ~Hasher() 00061 { 00062 for(Key i=0; i<MySize; i++) 00063 if(mChildren[i]) 00064 delete mChildren[i]; 00065 } 00066 00067 00068 // add a value for a key 00069 bool add(Key idx, value_type ptr) 00070 { 00071 assert(ptr != 0); 00072 Key myidx = my_index(idx); 00073 Y* y = mChildren[myidx]; 00074 if(!y) 00075 { 00076 y = mChildren[myidx] = new Y(); 00077 } 00078 bool inserted = y->add(sub_index(idx), ptr); 00079 if(inserted) 00080 { 00081 this->mCount++; 00082 } 00083 return inserted; 00084 } 00085 00086 // remove a key 00087 bool remove(Key idx) 00088 { 00089 Key myidx = my_index(idx); 00090 Y* y = mChildren[myidx]; 00091 bool removed = y ? y->remove(sub_index(idx)) : false; 00092 if(removed) 00093 { 00094 this->mCount--; 00095 if(y->count() == 0) 00096 { 00097 delete mChildren[myidx]; 00098 mChildren[myidx] = 0; 00099 } 00100 } 00101 return removed; 00102 } 00103 00104 // lookup a value 00105 const value_type& operator[](Key idx) const 00106 { 00107 Key myidx = my_index(idx); 00108 if(mChildren[myidx]) 00109 return (*mChildren[myidx])[sub_index(idx)]; 00110 return this->Null; 00111 } 00112 00113 value_type& operator[](Key idx) 00114 { 00115 Key myidx = my_index(idx); 00116 if(mChildren[myidx]) 00117 return (*mChildren[myidx])[sub_index(idx)]; 00118 return this->Null; 00119 } 00120 00121 Key count() const 00122 { 00123 return mCount; 00124 } 00125 00126 iterator begin() 00127 { 00128 return iterator(this->mChildren, true); 00129 } 00130 00131 iterator end() 00132 { 00133 return iterator(this->mChildren, false); 00134 } 00135 00136 class iterator : public BaseIter< typename Y::value_type > 00137 { 00138 friend class Hasher; 00139 typedef typename Y::value_type& reference; 00140 enum { MySize = 1 << B }; 00141 public: 00142 00143 iterator() : mChildren(NULL), index(MySize) {} 00144 iterator(const iterator& copy) 00145 { 00146 this->operator=(copy); 00147 } 00148 00149 // prefix incrementer 00150 iterator& operator++() 00151 { 00152 if(index != MySize) 00153 { 00154 ++iter; 00155 typename Y::iterator e = mChildren[index]->end(); 00156 for(; this->iter != e && !*iter; ++iter) {} 00157 if(iter == e) 00158 { 00159 for(++index; index < MySize && !mChildren[index]; ++index) {} 00160 if(index != MySize) 00161 { 00162 e = mChildren[index]->end(); 00163 for(iter = mChildren[index]->begin(); this->iter != e && !*iter; ++iter) {} 00164 } 00165 } 00166 } 00167 return *this; 00168 } 00169 00170 // postfix incrementer 00171 iterator operator++(int) 00172 { 00173 iterator tmp(*this); 00174 this->operator++(); 00175 return tmp; 00176 } 00177 00178 bool operator==(const iterator& other) const 00179 { 00180 if( MySize == this->index && MySize == other.index) 00181 return true; 00182 return this->index == other.index && this->iter == other.iter; 00183 } 00184 00185 bool operator!=(const iterator& other) const 00186 { 00187 return !operator==(other); 00188 } 00189 00190 reference operator*() 00191 { 00192 return *iter; 00193 } 00194 00195 iterator& operator=(const iterator& other) 00196 { 00197 this->mChildren = other.mChildren; 00198 this->index = other.index; 00199 if(other.mChildren) 00200 this->iter = other.iter; 00201 return *this; 00202 } 00203 00204 protected: 00205 iterator(Y** c, bool b) 00206 { 00207 mChildren = c; 00208 index = MySize; 00209 if(b) 00210 { 00211 Y** s = c; 00212 for(; s<c+MySize && !*s; s++){} 00213 if(s<c+MySize && *s) 00214 { 00215 index = s - c; 00216 iter = (*s)->begin(); 00217 for(; 0 == *iter; ++iter) {} 00218 } 00219 } 00220 } 00221 00222 Y** mChildren; 00223 size_t index; 00224 typename Y::iterator iter; 00225 }; 00226 00227 00228 // given key, get index for which subcontainer 00229 Key my_index(Key idx) const 00230 { 00231 return idx >> SubSize; 00232 } 00233 00234 // given key, get index for subcontainer 00235 Key sub_index(Key idx) const 00236 { 00237 return idx & MyMask; 00238 } 00239 00240 private: 00241 00242 Key mCount; 00243 Y* mChildren[MySize]; 00244 }; 00245 00246 00247 // vector class used as the real storage for the hasher 00248 template <class Y, unsigned int S> 00249 class HasherVector : public std::vector<Y> 00250 { 00251 public: 00252 typedef unsigned int Key; 00253 typedef typename std::vector<Y>::value_type value_type; 00254 enum { NumberOfBits = S }; 00255 00256 HasherVector() 00257 : std::vector<Y>(1<<S, (Y)0), mCount(0) 00258 { 00259 } 00260 00261 bool add(Key idx, value_type& ptr) 00262 { 00263 Y& p = (*this)[idx]; 00264 if(!p) 00265 { 00266 p = ptr; 00267 ++this->mCount; 00268 return true; 00269 } 00270 return false; 00271 } 00272 00273 bool remove(Key idx) 00274 { 00275 Y& p = (*this)[idx]; 00276 if(p) 00277 { 00278 p = 0; 00279 --this->mCount; 00280 return true; 00281 } 00282 return false; 00283 } 00284 00285 Key count() const 00286 { 00287 return mCount; 00288 } 00289 00290 private: 00291 Key mCount; 00292 }; 00293 00294 #if 0 00295 int main(int, char**) 00296 { 00297 00298 Hasher <10, HasherVector<int> > myHash; 00299 00300 for(int i = 1; i < 5000000; i++) 00301 { 00302 myHash.add(i,i); 00303 } 00304 00305 //printf("finished...\n"); 00306 } 00307 00308 00309 typedef Hasher<5, Hasher<5, HasherVector<int*> > > MyHash; 00310 MyHash mytree(15); 00311 00312 //mytree.add(1,(int*)(0x01)); 00313 00314 //std::vector<int*> vec; 00315 00316 00317 //HasherVector<int*> myhasher(5, (int*)0x055); 00318 //HasherVector<int*>::iterator jter = myhasher.begin(); 00319 //for(; jter != myhasher.end(); ++jter) 00320 //{ 00321 // int* val = *jter; 00322 //} 00323 00324 for(int i=0; i<10000; i++) 00325 { 00326 //unsigned int key = rand(); 00327 unsigned int key = i; 00328 int* value = (int*)rand(); 00329 bool added = mytree.add(key, value); 00330 if(!added) 00331 printf("wasn't added\n"); 00332 for(int j=0; j<1000; j++) 00333 mytree[key]; 00334 if(value != mytree[key]) 00335 printf("error %i != %i\n", (int)value, (int)mytree[key]); 00336 //mytree.remove(key); 00337 } 00338 00339 MyHash::iterator iter = mytree.begin(); 00340 for(; iter != mytree.end(); ++iter) 00341 { 00342 int* val = *iter; 00343 } 00344 } 00345 #endif 00346 00347 #endif // #ifndef CUBIT_HASHER 00348