casacore
HashMap.h
Go to the documentation of this file.
1 //# HashMap.h: this defines HashMap, which is a hashed associative array
2 //# Copyright (C) 1995,1996,1999,2000,2001
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef CASA_HASHMAP_H
29 #define CASA_HASHMAP_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
33 #include <casacore/casa/Containers/Block.h>
34 #include <casacore/casa/Containers/List.h>
35 #include <casacore/casa/Containers/OrderedPair.h>
36 #include <casacore/casa/Exceptions/Error.h>
37 
38 namespace casacore { //# NAMESPACE CASACORE - BEGIN
39 
40 //# Forward Declarations
41 template<class key,class val> class ConstHashMapIter;
43 extern void throw_hashmapiter_init_error();
44 
45 // <summary>
46 // Hash functions for standard types
47 // </summary>
48 //
49 // <synopsis>
50 // These are the declarations for the standard hash functions
51 // used by <linkto class=HashMap>HashMap</linkto>. In general, a function
52 // such as these is defined for each type that is to be used as
53 // a key in <linkto class=HashMap>HashMap</linkto>.
54 //
55 // These hash functions map the key type to any integer. This
56 // integer is then used by <linkto class=HashMap>HashMap</linkto> to
57 // select a bucket in the hash table.
58 // </synopsis>
59 //
60 // <group name=hashfunc>
61 uInt hashFunc(const String &);
62 uInt hashFunc(const float &);
63 uInt hashFunc(const double &);
64 uInt hashFunc(const int &);
65 uInt hashFunc(const unsigned &);
66 //</group>
67 
68 
69 // <summary>
70 // Specify the default values for HashMap keys
71 // </summary>
72 //
73 // <synopsis>
74 // These are the declarations for a set of functions which provide
75 // the default values for types which are used as keys in
76 // <linkto class=HashMap>HashMap</linkto>.
77 // </synopsis>
78 //
79 // <group name=defaulthashvalue>
80 const Int &defaultHashValue(const Int *);
81 const uInt &defaultHashValue(const uInt *);
82 const Long &defaultHashValue(const Long *);
83 const uLong &defaultHashValue(const uLong *);
84 const Float &defaultHashValue(const Float *);
85 const Double &defaultHashValue(const Double *);
86 const lDouble &defaultHashValue(const lDouble *);
87 // </group>
88 
89 // <summary>
90 // Hash function with state
91 // </summary>
92 // <use visibility=export>
93 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
94 //
95 // <etymology>
96 // This is basically a way of specifying a hash function, but
97 // it is implemented as a class. Thus it is called HashClass,
98 // similar to "hash function".
99 // </etymology>
100 //
101 // <synopsis>
102 // This class is used to specify a hash function. Sometimes a hash
103 // function may require state, it may be useful to create a
104 // hierarchy of hash functions, or it may be useful to create a class
105 // which provides for hashing as well as other functionality. This
106 // class can be used as a base class for any of these purposed. This
107 // class is intended for parameterization of
108 // <linkto class=HashMap>HashMap</linkto>.
109 //
110 // The hash function maps the key type to any integer. This
111 // integer is then used by <linkto class=HashMap>HashMap</linkto> to
112 // select a bucket in the hash table.
113 // </synopsis>
114 //
115 // <example>
116 // If one wished to make a HashClass for integers, something like the
117 // following might be done:
118 // <srcblock>
119 // class IntHash : public HashClass<Int> {
120 // public:
121 // uInt hash(const Int &v) const { return (uInt) v; }
122 // uInt hash(const Int &v) { return (uInt) v; }
123 // HashClass<Int> *clone() const { return new IntHash; }
124 // IntHash() : HashClass<Int>() { }
125 // };
126 // </srcblock>
127 // </example>
128 //
129 // <motivation>
130 // There may be occasions when it is more convenient to use a class
131 // instead of a single function. This base class provides a starting
132 // point plus, and <src>HashMap<k,v></src> has the necessary hooks to
133 // make use of classes derived from this class.
134 // </motivation>
135 //
136 template<class key> class HashClass {
137 public:
138  //
139  // This function maps elements of <src>key</src> type to any integer.
140  // This integer is then used by <linkto class=HashMap>HashMap</linkto> to
141  // select a bucket in the hash table.
142  //
143  virtual uInt hash(const key &) = 0;
144 
145  //
146  // This function is used to make a <em>deep copy</em>. This means that
147  // the copy, which this function returns, contains all derived information.
148  //
149  virtual HashClass<key> *clone() const = 0;
150 
151  HashClass();
152  virtual ~HashClass();
153 };
154 
155 
156 // <summary>
157 // Associative Array with a hash table implementation
158 // </summary>
159 // <use visibility=export>
160 // <reviewed reviewer="" date="yyyy/mm/dd" tests="" demos="">
161 //
162 // <prerequisite>
163 // <li> basic concepts behind hash tables
164 // </prerequisite>
165 //
166 // <etymology>
167 // This is an associative array, also known as a map, and it is implemented
168 // using a hash table, so it is called HashMap.
169 // </etymology>
170 //
171 // <synopsis>
172 // This class is an implementation of an associative array. This is a common
173 // data structure which associates a key of one type with a value of the same
174 // or different type. Essentially it is an (unordered) array which is indexed
175 // by an arbitrary type of index, e.g. strings.
176 //
177 // This class has two template type parameters. The first is the type of the
178 // key and the second is the type of the value. Thus the associative array
179 // is a mapping from the domain, any valid object of the key type, to the
180 // range, any valid object of the value type. This is a <em>complete</em>
181 // map which means that every element in the domain maps to one and only
182 // one element in the range. Those elements which have not been set by the
183 // user of this class map to a default value which is set at construction
184 // time.
185 //
186 // One of the important features of this class which must be understood
187 // is the load factor. This factor indicates the average number of items
188 // in the buckets of the hash table which are currently in use. The goal
189 // is to have the hash function greatly reduce the number of item which
190 // must be searched, i.e. to have a limited number of items in each bucket.
191 // The load factor determines this. Thus a load factor of 1000 or 0 is a
192 // poor choice. The default load factor is 4 which should generally be
193 // fine. The load factor is set with <src>setMaxLoad()</src> and retrieved
194 // with <src>maxLoad()</src>.
195 //
196 // For this class to be used,
197 // three things must be defined:
198 // <ul>
199 // <li> a specialization of the <src>hash()</src> templated function for
200 // the key type or a class derived from <src>HashClass<key></src>.
201 // Either of which can be used to implement the hash function for
202 // a particular type.
203 // <li> an equality operator ( '==' ) for the key
204 // <li> a default constructor or a specialization of
205 // <src>defaultHashValue()</src> for the key type
206 // </ul>
207 //
208 // The implementation of this hash map is derived from work on a proposed
209 // addition to the Standard Template Library by Javier Barreiro, Robert Fraley
210 // and <a href="http://www.cs.rpi.edu/~musser/">David R. Musser</a>. The information
211 // which is available includes:
212 // <ul>
213 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashrationale.ps.Z">
214 // rationale for hash map addition to the STL </a>
215 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashdoc.ps.Z">
216 // hash map addition proposal</a>
217 // <li> <a href="ftp://ftp.cs.rpi.edu/pub/stl/hashimp2.Z">
218 // preliminary implementation</a>
219 // </ul>
220 // each of these sources was utilized in the development of this set of classes,
221 // and in particular, the preliminary implementation was the source for the hashing
222 // algorithm used in this set of classes.
223 // </synopsis>
224 //
225 // <example>
226 // <srcblock>
227 // #include <casacore/casa/Containers/HashMap.h>
228 // #include <casacore/casa/BasicSL/String.h>
229 // #include <casacore/casa/iostream.h>
230 //
231 // main() {
232 // HashMap<String,Int> hash;
233 //
234 // hash("one") = 1; // sets the value of key "one" to "1"
235 // hash.define("two",2); // defines a mapping from key "two" to "2"
236 // hash("three") = 3;
237 // hash.define("four",4);
238 // hash("five") = 5;
239 // hash.define("six",6);
240 //
241 // HashMapIter<String,Int> iter(hash);
242 // for ( iter.toStart(); ! iter.atEnd(); iter++ )
243 // cout << iter.getVal() << ": " << iter.getKey() << endl;
244 //
245 // cout << endl << "Diagnostics" << endl <<
246 // "========================" << endl;
247 // cout << "number defined: " << hash.ndefined() << endl;
248 // cout << "buckets used: " << hash.usedBuckets() << endl;
249 // cout << "buckets available: " << hash.availableBuckets() << endl;
250 // cout << "buckets allocated: " << hash.allocBuckets() << endl;
251 // cout << "loading: " << hash.loading() << endl;
252 // cout << "size (in bytes): " << hash.totalSize() << endl;
253 //
254 // }
255 // </srcblock>
256 // </example>
257 //
258 // <motivation>
259 // There are a couple of reasons why this class was built:
260 // <ul>
261 // <li> use of a hash table can be more efficient
262 // <li> there are a couple of Map classes currently:
263 // <ul>
264 // <li> <linkto class=OrderedMap>OrderedMap</linkto>
265 // <li> <linkto class=SimpleOrderedMap>SimpleOrderedMap</linkto>
266 // </ul>
267 // <src>OrderedMap</src> is derived from a map base class,
268 // <linkto class=Map><src>Map</src></linkto> while
269 // <src>SimpleOrderedMap</src> is not. This collection of classes
270 // has resulted in confusion for the users. It is hoped that this
271 // class can in time replace these other "map" classes by
272 // satisfying the performance demands of
273 // <src>SimpleOrderedMap</src> while still meeting the constraints
274 // of the other map classes.
275 // </ul>
276 // </motivation>
277 //
278 // <templating arg=key>
279 // <li> equality operator (operator==)
280 // <li> function hashFunc() or HashClass derived class provided
281 // <li> default constructor or defaultHashValue() specialization provided or
282 // default value provided at time of construction
283 // </templating>
284 // <templating arg=val>
285 // <li> copy constructor
286 // </templating>
287 //
288 // <thrown>
289 // <li> AipsError
290 // </thrown>
291 //
292 // <todo asof="yyyy/mm/dd">
293 // <li> add this feature
294 // <li> fix this bug
295 // <li> start discussion of this possible extension
296 // </todo>
297 template<class key, class val> class HashMap {
298 friend class ConstHashMapIter<key,val>;
299 private:
300  enum HashMap_Constants { defaultSize_ = 131, defaultMaxLoad_ = 4 };
301 public:
302  static float defaultMaxLoad() { return float(defaultMaxLoad_); }
303  static uInt defaultSize() { return uInt(defaultSize_); }
304 
305  // Signature of the hash functions
306  typedef uInt (*Func)(const key&);
307  //
308  // Copy constructor with copy semantics
309  //
310  HashMap(const HashMap &);
311 
312  //
313  // Default constructor (and variation) which allows for
314  // specifying:
315  // <ul>
316  // <li> a default value, <src>dflt</src>
317  // <li> the initial number of buckets, <src>size</src>
318  // <li> the hash function, <src>newfunc</src>
319  // <li> the maximum load factor, <src>maxlf</src>
320  // </ul>
321  //
322  // This is a pair because the hash function can either be
323  // a simple function or a class derived from
324  // <linkto class=HashClass><src>HashClass</src></linkto>.
325  // <group>
326  HashMap(const val &dflt = defaultHashValue((const val*)(0)),
327  uInt size = uInt(defaultSize_),
328  Func newfunc = hashFunc,
329  float maxlf = float(defaultMaxLoad_))
330  : total_(size),
331  used_(size),
332  filled_(0),
333  defs_(0),
334  maxLoad_(maxlf),
335  blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
336  func(newfunc),
337  hashClass(0),
338  dfltVal(dflt)
339  { }
340 
341  HashMap(const val &dflt, uInt size, const HashClass<key> &newfunc,
342  float maxlf = float(defaultMaxLoad_))
343  : total_(size),
344  used_(size),
345  filled_(0),
346  defs_(0),
347  maxLoad_(maxlf),
348  blk(size, static_cast<List<OrderedPair<key,val> >*>(0)),
349  func(0),
350  hashClass(newfunc.clone()),
351  dfltVal(dflt)
352  { }
353  // </group>
354 
355 
356  //
357  // Constructor which allows for specifying:
358  // <ul>
359  // <li> default value, <src>dflt</src>
360  // <li> hash function, <src>newfunc</src>
361  // <li> maximum load factor, <src>maxlf</src>
362  // </ul>
363  // This is provided because often the user will not be interested
364  // in specifying the initial number of buckets since the number is
365  // increased as needed to maintain the max load factor.
366  //
367  // This is a pair because the hash function can either be
368  // a simple function or a class derived from
369  // <linkto class=HashClass><src>HashClass</src></linkto>.
370  // <group>
371  HashMap(const val &dflt, Func newfunc, float maxlf = float(defaultMaxLoad_))
372  : total_(uInt(defaultSize())), used_(uInt(defaultSize())),
373  filled_(0), defs_(0), maxLoad_(maxlf),
374  blk(uInt(defaultSize()), static_cast<List<OrderedPair<key,val> >*>(0)),
375  func(newfunc), hashClass(0), dfltVal(dflt)
376  { }
377 
378  HashMap(const val &dflt, const HashClass<key> &newfunc,
379  float maxlf = float(defaultMaxLoad_))
380  : total_(defaultSize()), used_(defaultSize()),
381  filled_(0), defs_(0), maxLoad_(maxlf),
382  blk(uInt(defaultSize_), static_cast<List<OrderedPair<key,val> >*>(0)), func(0),
383  hashClass(newfunc.clone()), dfltVal(dflt)
384  { }
385  // </group>
386 
387  //
388  // This copies the right hand side of the assignment. Assignment is done
389  // with <em>copy semantics</em>. This means that all the state is copied.
390  //
392 
393  //
394  // Retrieve values from the map, possibly for later assignment.
395  // It is important to realize that for the <em>non-const</em> version
396  // accessing the key causes an entry to be created in the map if it
397  // didn't already exist. The "value" for this new entry is the
398  // default value. "isDefined()" should be used if this behavior is
399  // not desired.
400  // <group>
401  const val &operator() (const key &ky) const;
402  val &operator() (const key &ky);
403  // </group>
404 
405  //
406  // Define a complete mapping.
407  //
408  val &define(const key &k, const val &v);
409  //
410  // Remove a user defined mapping from <src>k</src> to a value.
411  // After this, the value which <src>k</src> maps to reverts to
412  // the default value.
413  //
414  void remove(const key &k);
415  //
416  // Check to see if a user defined mapping exists for
417  // <src>k</src>. This does <em>not</em> modify the map.
418  //
419  Bool isDefined(const key &k) const;
420  //
421  // Retrieve the default value.
422  // <group>
423  const val &defaultVal() const { return dfltVal; }
424  val &defaultVal() { return dfltVal; }
425  // </group>
426 
427  //
428  // Remove all user defined mapping.
429  //
430  void clear() { freeTable(); }
431  //
432  // Get or set the maximum load factor.
433  //
434  // <group>
435  float maxLoad() const { return maxLoad_; }
436  void setMaxLoad( float new_max ) { maxLoad_ = new_max; }
437  // </group>
438 
439  // Total number of buckets, i.e. the number the
440  // hashing mechanism thinks it has. This is the total
441  // number of buckets used for calculations in the hashing
442  // mechanism. This may be smaller than <src>allocBuckets()</src>
443  // because more underlying storage may be allocated than the
444  // hashing mechanism needs.
445  uInt totalBuckets() const { return total_; }
446  // Number of buckets available, i.e. those which
447  // the hashing mechanism allows itself to use. This
448  // may be smaller than <src>totalBuckets()</src> because
449  // the hashing mechanism can restrict itself to some subset
450  // of the buckets available.
451  uInt availableBuckets() const { return used_; }
452  // Number of buckets currently populated by a bucket list.
453  uInt usedBuckets() const { return filled_; }
454  // Number of buckets which have been allocated, i.e. the
455  // total number which have currently been allocated. This
456  // is the number of buckets created. It may be bigger than
457  // <src>totalBuckets()</src> because more memory can be
458  // allocated than the hashing mechanism needs.
459  uInt allocBuckets() const { return blk.nelements(); }
460 
461  // Number of mappings which have been defined by the user.
462  uInt ndefined() const { return defs_; }
463 
464  // Current hash table loading factor.
465  float loading() const { return ndefined() ? (float) ndefined() / (float) availableBuckets() : 0.0; }
466  // Have any mappings been defined by the user.
467  Bool empty() const { return ndefined() == 0 ? True : False; }
468  // This returns a Block which has the number of elements in each bucket
469  // of the table.
470  Block<uInt> distribution() const;
471  // Total size of this HashMap minus dynamically allocated memory for
472  // key or val.
473  uInt totalSize() const;
474 
475  //
476  // dtor
477  //
478  virtual ~HashMap();
479 
480  enum {HashMapVersion = 1};
481 
482 protected:
483  // Call the hash function.
484  uInt hash(const key &k) const {
485  uInt off = func ? (*func)(k) % totalBuckets() :
486  hashClass ? hashClass->hash(k) % totalBuckets() : 0;
487  return off >= availableBuckets() ? off - (totalBuckets() >> 1) : off;
488  }
489  //
490  // delete the contents of the hash table
491  //
492  void freeTable();
493  //
494  // enlarge the hash table. Returns the bucket which is being
495  // moved...
496  //
497  uInt enlarge();
498  //
499  // populate bucket "to". Returns the bucket which is being
500  // moved...
501  //
502  uInt populate( uInt to );
503 
504 private:
505  // Total Slots
507  // Slots Being Used
509  // Slots with At Least One Value in the Bucket
511  // Number of Defined Mappings
513  // Maximum load
514  float maxLoad_;
516  Func func;
518  val dfltVal;
519 };
520 
521 
522 } //# NAMESPACE CASACORE - END
523 
524 #ifndef CASACORE_NO_AUTO_TEMPLATES
525 #include <casacore/casa/Containers/HashMap.tcc>
526 #endif //# CASACORE_NO_AUTO_TEMPLATES
527 #endif
val & defaultVal()
Definition: HashMap.h:424
uInt totalBuckets() const
Total number of buckets, i.e.
Definition: HashMap.h:445
int Int
Definition: aipstype.h:47
HashMap(const val &dflt, uInt size, const HashClass< key > &newfunc, float maxlf=float(defaultMaxLoad_))
Definition: HashMap.h:341
const val & defaultVal() const
Retrieve the default value.
Definition: HashMap.h:423
Ordered pair class.
Definition: OrderedPair.h:57
uInt usedBuckets() const
Number of buckets currently populated by a bucket list.
Definition: HashMap.h:453
uInt total_
Total Slots.
Definition: HashMap.h:506
uInt availableBuckets() const
Number of buckets available, i.e.
Definition: HashMap.h:451
PtrBlock< List< OrderedPair< key, val > > * > blk
Definition: HashMap.h:515
PtrHolder< T > & operator=(const PtrHolder< T > &other)
Bool empty() const
Have any mappings been defined by the user.
Definition: HashMap.h:467
void throw_hashmapiter_init_error()
HashMap(const val &dflt, Func newfunc, float maxlf=float(defaultMaxLoad_))
Constructor which allows for specifying:
Definition: HashMap.h:371
float maxLoad_
Maximum load.
Definition: HashMap.h:514
static uInt defaultSize()
Definition: HashMap.h:303
uInt ndefined() const
Number of mappings which have been defined by the user.
Definition: HashMap.h:462
Hash function with state.
Definition: HashMap.h:136
virtual HashClass< key > * clone() const =0
This function is used to make a deep copy.
Doubly linked list.
Definition: List.h:52
static float defaultMaxLoad()
Definition: HashMap.h:302
long Long
Definition: aipstype.h:49
HashMap(const val &dflt, const HashClass< key > &newfunc, float maxlf=float(defaultMaxLoad_))
Definition: HashMap.h:378
uInt used_
Slots Being Used.
Definition: HashMap.h:508
double Double
Definition: aipstype.h:52
uInt filled_
Slots with At Least One Value in the Bucket.
Definition: HashMap.h:510
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
uInt hashFunc(const ObjectID &)
HashMap(const val &dflt=defaultHashValue((const val *)(0)), uInt size=uInt(defaultSize_), Func newfunc=hashFunc, float maxlf=float(defaultMaxLoad_))
Default constructor (and variation) which allows for specifying:
Definition: HashMap.h:326
float Float
Definition: aipstype.h:51
uInt defs_
Number of Defined Mappings.
Definition: HashMap.h:512
const Bool False
Definition: aipstype.h:41
A drop-in replacement for Block<T*>.
Definition: Block.h:861
unsigned long uLong
Definition: aipstype.h:50
long double lDouble
Definition: aipstype.h:53
void setMaxLoad(float new_max)
Definition: HashMap.h:436
void throw_invalid_hashmapiter_error()
virtual uInt hash(const key &)=0
This function maps elements of key type to any integer.
HashClass< key > * hashClass
Definition: HashMap.h:517
uInt hash(const key &k) const
Call the hash function.
Definition: HashMap.h:484
float loading() const
Current hash table loading factor.
Definition: HashMap.h:465
String: the storage and methods of handling collections of characters.
Definition: String.h:223
uInt allocBuckets() const
Number of buckets which have been allocated, i.e.
Definition: HashMap.h:459
void clear()
Remove all user defined mapping.
Definition: HashMap.h:430
const Bool True
Definition: aipstype.h:40
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:48
float maxLoad() const
Get or set the maximum load factor.
Definition: HashMap.h:435
Associative Array with a hash table implementation.
Definition: HashMap.h:297