casacore
SSMBase.h
Go to the documentation of this file.
1 //# SSMBase.h: Base class of the Standard Storage Manager
2 //# Copyright (C) 2000,2001,2002
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_SSMBASE_H
29 #define TABLES_SSMBASE_H
30 
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
34 #include <casacore/tables/DataMan/DataManager.h>
35 #include <casacore/casa/Containers/Block.h>
36 
37 namespace casacore { //# NAMESPACE CASACORE - BEGIN
38 
39 //# Forward declarations
40 class BucketCache;
41 class BucketFile;
42 class StManArrayFile;
43 class SSMIndex;
44 class SSMColumn;
45 class SSMStringHandler;
46 
47 // <summary>
48 // Base class of the Standard Storage Manager
49 // </summary>
50 
51 // <use visibility=local>
52 
53 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tStandardStMan.cc">
54 // </reviewed>
55 
56 // <prerequisite>
57 //# Classes you should understand before using this one.
58 // <li> <linkto class=StandardStMan>StandardStMan</linkto>
59 // <li> <linkto class=SSMColumn>SSMColumn</linkto>
60 // </prerequisite>
61 
62 // <etymology>
63 // SSMBase is the base class of the Standard Storage Manager.
64 // </etymology>
65 
66 // <synopsis>
67 // The global principles of this class are described in
68 // <linkto class="StandardStMan:description">StandardStMan</linkto>.
69 // <p>
70 // The Standard Storage Manager divides the data file in equally sized
71 // chunks called buckets. There are 3 types of buckets:
72 // <ul>
73 // <li> Data buckets containing the fixed length data (scalars and
74 // direct arrays of data type Int, Float, Bool, etc.).
75 // For variable shaped data (strings and indirect arrays) they
76 // contain references to the actual data position in the
77 // string buckets or in an external file.
78 // <li> String buckets containing strings and array of strings.
79 // <li> Index buckets containing the index info for the data buckets.
80 // </ul>
81 // Bucket access is handled by class
82 // <linkto class=BucketCache>BucketCache</linkto>.
83 // It also keeps a list of free buckets. A bucket is freed when it is
84 // not needed anymore (e.g. all data from it are deleted).
85 // <p>
86 // Data buckets form the main part of the SSM. The data can be viewed as
87 // a few streams of buckets, where each stream contains the data of
88 // a given number of columns. Each stream has an
89 // <linkto class=SSMIndex>SSMIndex</linkto> object describing the
90 // number of rows stored in each data bucket of the stream.
91 // The SSM starts with a single bucket stream (holding all columns),
92 // but when columns are added, new bucket streams might be created.
93 // <p>
94 // For example, we have an SSM with a bucket size of 100 bytes.
95 // There are 5 Int columns (A,B,C,D,E) each taking 4 bytes per row.
96 // Column A, B, C, and D are stored in bucket stream 1, while column
97 // E is stored in bucket stream 2. So in stream 1 each bucket can hold
98 // 6 rows, while in stream 2 each bucket can hold 25 rows.
99 // For a 100 row table it will result in 17+4 data buckets.
100 // <p>
101 // A few classes collaborate to make it work:
102 // <ul>
103 // <li> Each bucket stream has an <linkto class=SSMIndex>SSMIndex</linkto>
104 // object to map row number to bucket number.
105 // Note that in principle each bucket in a stream contains the same
106 // number of rows. However, when a row is deleted it is removed
107 // from its bucket shifting the remainder to the left. Data in the
108 // next buckets is not shifted, so that bucket has now one row less.
109 // <li> For each column SSMBase knows to which bucket stream it belongs
110 // and at which offset the column starts in a bucket.
111 // Note that column data in a bucket are adjacent, which is done
112 // to make it easier to use the
113 // <linkto class=ColumnCache>ColumnCache</linkto> object in SSMColumn
114 // and to be able to efficiently store Bool values as bits.
115 // <li> Each column has an <linkto class=SSMColumn>SSMColumn</linkto>
116 // object knowing how many bits each data cell takes in a bucket.
117 // The SSMColumn objects handle all access to data in the columns
118 // (using SSMBase and SSMIndex).
119 // </ul>
120 // <p>
121 // String buckets are used by class
122 // <linkto class=SSMStringHandler>SSMStringHandler</linkto> to
123 // store scalar strings and fixed and variable shaped arrays of strings.
124 // The bucketnr, offset, and length of such string (arrays) are stored
125 // in the data buckets.
126 // <br>
127 // Indirect arrays of other data types are also stored indirectly
128 // and their offset is stored in the data buckets. Such arrays are
129 // handled by class <linkto class=StIndArray>StIndArray</linkto>
130 // which uses an extra file to store the arrays.
131 // <p>
132 // Index buckets are used by SSMBase to make the SSMIndex data persistent.
133 // It uses alternately 2 sets of index buckets. In that way there is
134 // always an index availanle in case the system crashes.
135 // If possible 2 halfs of a single bucket are used alternately, otherwise
136 // separate buckets are used.
137 // </synopsis>
138 
139 // <motivation>
140 // The public interface of SSMBase is quite large, because the other
141 // internal SSM classes need these functions. To have a class with a
142 // minimal interface for the normal user, class <src>StandardStMan</src>
143 // is derived from it.
144 // <br>StandardStMan needs an isA- instead of hasA-relation to be
145 // able to bind columns to it in class <linkto class=SetupNewTable>
146 // SetupNewTable</linkto>.
147 // </motivation>
148 
149 // <todo asof="$DATE:$">
150 //# A List of bugs, limitations, extensions or planned refinements.
151 // <li> Remove AipsIO argument from open and close.
152 // <li> When only 1 bucket in use addcolumn can check if there's enough
153 // room to fit the new column (so rearange the bucket) in the free
154 // row space.
155 // </todo>
156 
157 
158 class SSMBase: public DataManager
159 {
160 public:
161  // Create a Standard storage manager with default name SSM.
162  explicit SSMBase (Int aBucketSize=0,
163  uInt aCacheSize=1);
164 
165  // Create a Standard storage manager with the given name.
166  explicit SSMBase (const String& aDataManName,
167  Int aBucketSize=0,
168  uInt aCacheSize=1);
169 
170  // Create a Standard storage manager with the given name.
171  // The specifications are part of the record (as created by dataManagerSpec).
172  SSMBase (const String& aDataManName,
173  const Record& spec);
174 
175  ~SSMBase();
176 
177  // Clone this object.
178  // It does not clone SSMColumn objects possibly used.
179  // The caller has to delete the newly created object.
180  virtual DataManager* clone() const;
181 
182  // Get the type name of the data manager (i.e. StandardStMan).
183  virtual String dataManagerType() const;
184 
185  // Get the name given to the storage manager (in the constructor).
186  virtual String dataManagerName() const;
187 
188  // Record a record containing data manager specifications.
189  virtual Record dataManagerSpec() const;
190 
191  // Get data manager properties that can be modified.
192  // It is only ActualCacheSize (the actual cache size in buckets).
193  // It is a subset of the data manager specification.
194  virtual Record getProperties() const;
195 
196  // Modify data manager properties.
197  // Only ActualCacheSize can be used. It is similar to function setCacheSize
198  // with <src>canExceedNrBuckets=False</src>.
199  virtual void setProperties (const Record& spec);
200 
201  // Get the version of the class.
202  uInt getVersion() const;
203 
204  // Set the cache size (in buckets).
205  // If <src>canExceedNrBuckets=True</src>, the given cache size can be
206  // larger than the nr of buckets in the file. In this way the cache can
207  // be made large enough for a future file extension.
208  // Otherwise, it is limited to the actual number of buckets. This is useful
209  // if one wants the entire file to be cached.
210  void setCacheSize (uInt aCacheSize, Bool canExceedNrBuckets=True);
211 
212  // Get the current cache size (in buckets).
213  uInt getCacheSize() const;
214 
215  // Clear the cache used by this storage manager.
216  // It will flush the cache as needed and remove all buckets from it.
217  void clearCache();
218 
219  // Show the statistics of all caches used.
220  virtual void showCacheStatistics (ostream& anOs) const;
221 
222  // Show statistics of all indices used.
223  void showIndexStatistics (ostream & anOs) const;
224 
225  // Show statistics of the Base offsets/index etc.
226  void showBaseStatistics (ostream & anOs) const;
227 
228  // Get the bucket size.
229  uInt getBucketSize() const;
230 
231  // Get the number of rows in this storage manager.
232  uInt getNRow() const;
233 
234  // The storage manager can add rows.
235  virtual Bool canAddRow() const;
236 
237  // The storage manager can delete rows.
238  virtual Bool canRemoveRow() const;
239 
240  // The storage manager can add columns.
241  virtual Bool canAddColumn() const;
242 
243  // The storage manager can delete columns.
244  virtual Bool canRemoveColumn() const;
245 
246  // Make the object from the type name string.
247  // This function gets registered in the DataManager "constructor" map.
248  // The caller has to delete the object.
249  static DataManager* makeObject (const String& aDataManType,
250  const Record& spec);
251 
252  // Get access to the given column.
253  SSMColumn& getColumn (uInt aColNr);
254 
255  // Get access to the given Index.
256  SSMIndex& getIndex (uInt anIdxNr);
257 
258  // Make the current bucket in the cache dirty (i.e. something has been
259  // changed in it and it needs to be written when removed from the cache).
260  // (used by SSMColumn::putValue).
261  void setBucketDirty();
262 
263  // Open (if needed) the file for indirect arrays with the given mode.
264  // Return a pointer to the object.
266 
267  // Find the bucket containing the column and row and return the pointer
268  // to the beginning of the column data in that bucket.
269  // It also fills in the start and end row for the column data.
270  char* find (uInt aRowNr, uInt aColNr,
271  uInt& aStartRow, uInt& anEndRow);
272 
273  // Add a new bucket and get its bucket number.
274  uInt getNewBucket();
275 
276  // Read the bucket (if needed) and return the pointer to it.
277  char* getBucket (uInt aBucketNr);
278 
279  // Remove a bucket from the bucket cache.
280  void removeBucket (uInt aBucketNr);
281 
282  // Get rows per bucket for the given column.
283  uInt getRowsPerBucket (uInt aColumn) const;
284 
285  // Return a pointer to the (one and only) StringHandler object.
287 
288  // <group>
289  // Callbacks for BucketCache access.
290  static char* readCallBack (void* anOwner, const char* aBucketStorage);
291  static void writeCallBack (void* anOwner, char* aBucketStorage,
292  const char* aBucket);
293  static void deleteCallBack (void*, char* aBucket);
294  static char* initCallBack (void* anOwner);
295  // </group>
296 
297 private:
298  // Copy constructor (only meant for clone function).
299  SSMBase (const SSMBase& that);
300 
301  // Assignment cannot be used.
302  SSMBase& operator= (const SSMBase& that);
303 
304  // (Re)create the index, file, and cache object.
305  // It is used when all rows are deleted from the table.
306  void recreate();
307 
308  // The data manager supports use of MultiFile.
309  virtual Bool hasMultiFileSupport() const;
310 
311  // Flush and optionally fsync the data.
312  // It returns a True status if it had to flush (i.e. if data have changed).
313  virtual Bool flush (AipsIO&, Bool doFsync);
314 
315  // Let the storage manager create files as needed for a new table.
316  // This allows a column with an indirect array to create its file.
317  virtual void create (uInt aNrRows);
318 
319  // Open the storage manager file for an existing table, read in
320  // the data, and let the SSMColumn objects read their data.
321  virtual void open (uInt aRowNr, AipsIO&);
322 
323  // Resync the storage manager with the new file contents.
324  // This is done by clearing the cache.
325  virtual void resync (uInt aRowNr);
326 
327  // Reopen the storage manager files for read/write.
328  virtual void reopenRW();
329 
330  // The data manager will be deleted (because all its columns are
331  // requested to be deleted).
332  // So clean up the things needed (e.g. delete files).
333  virtual void deleteManager();
334 
335  // Let the storage manager initialize itself (upon creation).
336  // It determines the bucket size and fills the index.
337  void init();
338 
339  // Determine and set the bucket size.
340  // It returns the number of rows per bucket.
342 
343  // Get the number of indices in use.
344  uInt getNrIndices() const;
345 
346  // Add rows to the storage manager.
347  // Per column it extends number of rows.
348  virtual void addRow (uInt aNrRows);
349 
350  // Delete a row from all columns.
351  virtual void removeRow (uInt aRowNr);
352 
353  // Do the final addition of a column.
354  virtual void addColumn (DataManagerColumn*);
355 
356  // Remove a column from the data file.
357  virtual void removeColumn (DataManagerColumn*);
358 
359  // Create a column in the storage manager on behalf of a table column.
360  // The caller has to delete the newly created object.
361  // <group>
362  // Create a scalar column.
363  virtual DataManagerColumn* makeScalarColumn (const String& aName,
364  int aDataType,
365  const String& aDataTypeID);
366  // Create a direct array column.
367  virtual DataManagerColumn* makeDirArrColumn (const String& aName,
368  int aDataType,
369  const String& aDataTypeID);
370  // Create an indirect array column.
371  virtual DataManagerColumn* makeIndArrColumn (const String& aName,
372  int aDataType,
373  const String& aDataTypeID);
374  // </group>
375 
376  // Get the cache object.
377  // This will construct the cache object if not present yet.
378  // The cache object will be deleted by the destructor.
380 
381  // Construct the cache object (if not constructed yet).
382  void makeCache();
383 
384  // Read the header.
385  void readHeader();
386 
387  // Read the index from its buckets.
388  void readIndexBuckets();
389 
390  // Write the header and the indices.
391  void writeIndex();
392 
393 
394  //# Declare member variables.
395  // Name of data manager.
397 
398  // The file containing the indirect arrays.
400 
401  // The number of rows in the columns.
403 
404  // Column offset
406 
407  // Row Index ID containing all the columns in a bucket
409 
410  // Will contain all indices
412 
413  // The cache with the SSM buckets.
415 
416  // The file containing all data.
418 
419  // String handler class
421 
422  // The persistent cache size.
424 
425  // The actual cache size.
427 
428  // The initial number of buckets in the cache.
430 
431  // Nr of buckets needed for index.
433 
434  // Number of the first index bucket
436 
437  // Offset of index in first bucket.
438  // If >0, the index fits in a single bucket.
440 
441  // Number of the first String Bucket
443 
444  // length of index memoryblock
446 
447  // The nr of free buckets.
449 
450  // The first free bucket.
452 
453  // The bucket size.
456 
457  // The assembly of all columns.
459 
460  // Has the data changed since the last flush?
462 };
463 
464 
466 {
467  return itsPtrIndex.nelements();
468 }
469 
471 {
472  return itsCacheSize;
473 }
474 
475 inline uInt SSMBase::getNRow() const
476 {
477  return itsNrRows;
478 }
479 
481 {
482  return itsBucketSize;
483 }
484 
486 {
487  if (itsCache == 0) {
488  makeCache();
489  }
490  return *itsCache;
491 }
492 
494 {
495  return *(itsPtrColumn[aColNr]);
496 }
497 
498 inline SSMIndex& SSMBase::getIndex (uInt anIdxNr)
499 {
500  return *(itsPtrIndex[anIdxNr]);
501 }
502 
504 {
505  return itsStringHandler;
506 }
507 
508 
509 
510 } //# NAMESPACE CASACORE - END
511 
512 #endif
void setCacheSize(uInt aCacheSize, Bool canExceedNrBuckets=True)
Set the cache size (in buckets).
virtual Bool canAddColumn() const
The storage manager can add columns.
void init()
Let the storage manager initialize itself (upon creation).
File object for BucketCache.
Definition: BucketFile.h:107
uInt getVersion() const
Get the version of the class.
virtual void create(uInt aNrRows)
Let the storage manager create files as needed for a new table.
int Int
Definition: aipstype.h:47
uInt setBucketSize()
Determine and set the bucket size.
The bucket index for a group of columns in the Standard Storage Manager.
Definition: SSMIndex.h:86
virtual DataManager * clone() const
Clone this object.
Cache for buckets in a part of a file.
Definition: BucketCache.h:217
Int itsFirstFreeBucket
The first free bucket.
Definition: SSMBase.h:451
static void deleteCallBack(void *, char *aBucket)
virtual void reopenRW()
Reopen the storage manager files for read/write.
BucketCache * itsCache
The cache with the SSM buckets.
Definition: SSMBase.h:414
String itsDataManName
Name of data manager.
Definition: SSMBase.h:396
virtual Bool canRemoveColumn() const
The storage manager can delete columns.
virtual Record getProperties() const
Get data manager properties that can be modified.
void removeBucket(uInt aBucketNr)
Remove a bucket from the bucket cache.
uInt getBucketSize() const
Get the bucket size.
Definition: SSMBase.h:480
AipsIO is the object persistency mechanism of Casacore.
Definition: AipsIO.h:168
Abstract base class for a column in a data manager.
Definition: DataManager.h:616
uInt itsIndexLength
length of index memoryblock
Definition: SSMBase.h:445
uInt getNrIndices() const
Get the number of indices in use.
Definition: SSMBase.h:465
uInt itsFreeBucketsNr
The nr of free buckets.
Definition: SSMBase.h:448
virtual void addRow(uInt aNrRows)
Add rows to the storage manager.
uInt getNewBucket()
Add a new bucket and get its bucket number.
virtual DataManagerColumn * makeScalarColumn(const String &aName, int aDataType, const String &aDataTypeID)
Create a column in the storage manager on behalf of a table column.
uInt itsBucketRows
Definition: SSMBase.h:455
SSMBase & operator=(const SSMBase &that)
Assignment cannot be used.
Bool isDataChanged
Has the data changed since the last flush?
Definition: SSMBase.h:461
static char * initCallBack(void *anOwner)
virtual void removeColumn(DataManagerColumn *)
Remove a column from the data file.
void readHeader()
Read the header.
StManArrayFile * itsIosFile
The file containing the indirect arrays.
Definition: SSMBase.h:399
Base class of the Standard Storage Manager.
Definition: SSMBase.h:158
void setBucketDirty()
Make the current bucket in the cache dirty (i.e.
virtual void showCacheStatistics(ostream &anOs) const
Show the statistics of all caches used.
virtual void addColumn(DataManagerColumn *)
Do the final addition of a column.
void readIndexBuckets()
Read the index from its buckets.
void recreate()
(Re)create the index, file, and cache object.
PtrBlock< SSMColumn * > itsPtrColumn
The assembly of all columns.
Definition: SSMBase.h:458
Store strings in the Standard Storage Manager.
void clearCache()
Clear the cache used by this storage manager.
Int itsFirstIdxBucket
Number of the first index bucket.
Definition: SSMBase.h:435
uInt itsIdxBucketOffset
Offset of index in first bucket.
Definition: SSMBase.h:439
SSMStringHandler * getStringHandler()
Return a pointer to the (one and only) StringHandler object.
Definition: SSMBase.h:503
uInt getRowsPerBucket(uInt aColumn) const
Get rows per bucket for the given column.
char * find(uInt aRowNr, uInt aColNr, uInt &aStartRow, uInt &anEndRow)
Find the bucket containing the column and row and return the pointer to the beginning of the column d...
virtual void open(uInt aRowNr, AipsIO &)
Open the storage manager file for an existing table, read in the data, and let the SSMColumn objects ...
void showBaseStatistics(ostream &anOs) const
Show statistics of the Base offsets/index etc.
static char * readCallBack(void *anOwner, const char *aBucketStorage)
Callbacks for BucketCache access.
A hierarchical collection of named fields of various types.
Definition: Record.h:181
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
virtual void deleteManager()
The data manager will be deleted (because all its columns are requested to be deleted).
uInt itsNrBuckets
The initial number of buckets in the cache.
Definition: SSMBase.h:429
uInt itsBucketSize
The bucket size.
Definition: SSMBase.h:454
Block< uInt > itsColIndexMap
Row Index ID containing all the columns in a bucket.
Definition: SSMBase.h:408
A Column in the Standard Storage Manager.
Definition: SSMColumn.h:99
virtual Bool hasMultiFileSupport() const
The data manager supports use of MultiFile.
virtual DataManagerColumn * makeDirArrColumn(const String &aName, int aDataType, const String &aDataTypeID)
Create a direct array column.
A drop-in replacement for Block<T*>.
Definition: Block.h:861
virtual String dataManagerType() const
Get the type name of the data manager (i.e.
SSMIndex & getIndex(uInt anIdxNr)
Get access to the given Index.
Definition: SSMBase.h:498
uInt getNRow() const
Get the number of rows in this storage manager.
Definition: SSMBase.h:475
SSMColumn & getColumn(uInt aColNr)
Get access to the given column.
Definition: SSMBase.h:493
BucketFile * itsFile
The file containing all data.
Definition: SSMBase.h:417
virtual Record dataManagerSpec() const
Record a record containing data manager specifications.
static DataManager * makeObject(const String &aDataManType, const Record &spec)
Make the object from the type name string.
uInt itsNrRows
The number of rows in the columns.
Definition: SSMBase.h:402
OpenOption
Define the possible ByteIO open options.
Definition: ByteIO.h:65
virtual Bool flush(AipsIO &, Bool doFsync)
Flush and optionally fsync the data.
Int itsLastStringBucket
Number of the first String Bucket.
Definition: SSMBase.h:442
Abstract base class for a data manager.
Definition: DataManager.h:222
virtual String dataManagerName() const
Get the name given to the storage manager (in the constructor).
virtual void removeRow(uInt aRowNr)
Delete a row from all columns.
BucketCache & getCache()
Get the cache object.
Definition: SSMBase.h:485
uInt itsNrIdxBuckets
Nr of buckets needed for index.
Definition: SSMBase.h:432
Block< uInt > itsColumnOffset
Column offset.
Definition: SSMBase.h:405
uInt itsPersCacheSize
The persistent cache size.
Definition: SSMBase.h:423
virtual DataManagerColumn * makeIndArrColumn(const String &aName, int aDataType, const String &aDataTypeID)
Create an indirect array column.
virtual Bool canAddRow() const
The storage manager can add rows.
String: the storage and methods of handling collections of characters.
Definition: String.h:223
void makeCache()
Construct the cache object (if not constructed yet).
virtual void setProperties(const Record &spec)
Modify data manager properties.
virtual void resync(uInt aRowNr)
Resync the storage manager with the new file contents.
void writeIndex()
Write the header and the indices.
char * getBucket(uInt aBucketNr)
Read the bucket (if needed) and return the pointer to it.
uInt itsCacheSize
The actual cache size.
Definition: SSMBase.h:426
StManArrayFile * openArrayFile(ByteIO::OpenOption anOpt)
Open (if needed) the file for indirect arrays with the given mode.
static void writeCallBack(void *anOwner, char *aBucketStorage, const char *aBucket)
void showIndexStatistics(ostream &anOs) const
Show statistics of all indices used.
PtrBlock< SSMIndex * > itsPtrIndex
Will contain all indices.
Definition: SSMBase.h:411
Read/write array in external format for a storage manager.
Definition: StArrayFile.h:129
const Bool True
Definition: aipstype.h:40
this file contains all the compiler specific defines
Definition: mainpage.dox:28
virtual Bool canRemoveRow() const
The storage manager can delete rows.
SSMStringHandler * itsStringHandler
String handler class.
Definition: SSMBase.h:420
unsigned int uInt
Definition: aipstype.h:48
SSMBase(Int aBucketSize=0, uInt aCacheSize=1)
Create a Standard storage manager with default name SSM.
uInt getCacheSize() const
Get the current cache size (in buckets).
Definition: SSMBase.h:470