casacore
ColumnsIndexArray.h
Go to the documentation of this file.
1 //# ColumnsIndexArray.h: Index to an array column in a table
2 //# Copyright (C) 2001,2002
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_COLUMNSINDEXARRAY_H
29 #define TABLES_COLUMNSINDEXARRAY_H
30 
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
34 #include <casacore/tables/Tables/Table.h>
35 #include <casacore/casa/Arrays/Vector.h>
36 #include <casacore/casa/Containers/Block.h>
37 #include <casacore/casa/Containers/Record.h>
38 
39 namespace casacore { //# NAMESPACE CASACORE - BEGIN
40 
41 //# Forward Declarations
42 class String;
43 class TableColumn;
44 
45 
46 // <summary>
47 // Index to an array column in a table.
48 // </summary>
49 
50 // <use visibility=export>
51 
52 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndexArray.cc" demos="">
53 // </reviewed>
54 
55 // <prerequisite>
56 // <li> <linkto class=Table>Table</linkto>
57 // <li> <linkto class=Record>Record</linkto>
58 // <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto>
59 // </prerequisite>
60 
61 // <synopsis>
62 // This class makes it possible to use transient indices on top
63 // of an array column in a table in order to speed up the process of
64 // finding rows based on a given key or key range.
65 // It is similar to class <linkto class=ColumnsIndex>ColumnsIndex</linkto>
66 // which is meant for one or more scalar columns.
67 // <p>
68 // When constructing a <src>ColumnsIndexArray</src> object, one has to define
69 // which column forms the key for this index on the given
70 // <src>table</src> object.
71 // Not every data type is supported; only uChar, Short, Int, uInt, and
72 // String array columns are supported.
73 // The column can contain arrays of any shape and it can also contain
74 // empty cells. The class will probably mostly be used for vectors, as
75 // they seem to be the most logical way to hold multiple keys.
76 // <br>The data in the given column will be read, sorted,
77 // and stored in memory. When looking up a key or key range, the class
78 // will use a fast binary search on the data held in memory.
79 // <p>
80 // The <src>ColumnsIndexArray</src> object contains a
81 // <linkto class=Record>Record</linkto> object which can be used
82 // to define the key to be looked up. The record contains a field for
83 // the column in the index (with the same name and data type).
84 // The fastest way to fill the key is by creating a
85 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for
86 // the field in the record (see the example) and fill it as needed.
87 // However, one can also use the <src>Record::define</src> function,
88 // but that is slower.
89 // <br>
90 // A second record is available to define the upper key
91 // in case a key range has to be looked up. The keys can be accessed
92 // using the various <src>accessKey</src> functions.
93 // <p>
94 // When a key is defined, the <src>getRowNumbers</src> function can be
95 // used to find the table rows containing the given key (range).
96 // Function <src>getRowNumber</src> can be used to lookup a single key
97 // if all keys in the index are unique (which can be tested with the
98 // <src>isUnique</src> function).
99 // <p>
100 // Instead of using the internal records holding the keys, one can also
101 // pass its own Record object to <src>getRowNumbers</src>.
102 // However, it will be slower.
103 // <p>
104 // After an index is created, it is possible to change the data
105 // in the underlying columns. However, the <src>ColumnsIndexArray</src> can
106 // not detect if the column data have changed. It can only detect if
107 // the number of rows has changed. If the column data have changed,
108 // the user has to use the <src>setChanged</src> function to indicate
109 // that the column has changed.
110 // <br>If data have changed, the entire index will be recreated by
111 // rereading and resorting the data. This will be deferred
112 // until the next key lookup.
113 // </synopsis>
114 
115 // <example>
116 // Suppose one has table with a column NAME containing vectors.
117 // <srcblock>
118 // // Open the table and make an index for the column.
119 // Table tab("my.tab")
120 // ColumnsIndexArray colInx(tab, "NAME");
121 // // Make a RecordFieldPtr for the NAME field in the index key record.
122 // // Its data type has to match the data type of the column.
123 // RecordFieldPtr<String> nameFld(colInx.accessKey(), "NAME");
124 // // Find the row for a given name.
125 // Bool found;
126 // // Fill the key field and get the row number.
127 // // NAME is a unique key, so only one row number matches.
128 // // Otherwise function getRowNumbers had to be used.
129 // *nameFld = "MYNAME";
130 // uInt rownr = colInx.getRowNumber (found);
131 // if (!found) {
132 // cout << "Name MYNAME is unknown" << endl;
133 // }
134 // // Now get a range of names and return the row numbers in ascending order.
135 // // This uses the fact that the 'unique' argument also sorts the data.
136 // RecordFieldPtr<String> nameUpp(colInx.accessUpperKey(), "NAME");
137 // *nameFld = "LOWER";
138 // *nameUpp = "UPPER";
139 // Vector<uInt> rownrs = colInx.getRowNumbers (True, True, True);
140 // </srcblock>
141 
142 // <motivation>
143 // Bob Garwood needed such a class.
144 // </motivation>
145 
146 
148 {
149 public:
150  // Create an index on the given table for the given column.
151  // The column can be a scalar or an array column.
152  // If <src>noSort==True</src>, the table is already in order of that
153  // column and the sort step will not be done.
154  // It only supports String and integer columns.
155  ColumnsIndexArray (const Table&, const String& columnName);
156 
157  // Copy constructor (copy semantics).
158  ColumnsIndexArray (const ColumnsIndexArray& that);
159 
161 
162  // Assignment (copy semantics).
164 
165  // Are all keys in the index unique?
166  Bool isUnique() const;
167 
168  // Return the names of the columns forming the index.
169  const String& columnName() const;
170 
171  // Get the table for which this index is created.
172  const Table& table() const;
173 
174  // Something has changed in the table, so the index has to be recreated.
175  // The 2nd version indicates that a specific column has changed,
176  // so only that column might need to be reread. If that column is not
177  // part of the index, nothing will be done.
178  // <br>Note that the class itself is keeping track if the number of
179  // rows in the table changes.
180  // <group>
181  void setChanged();
182  void setChanged (const String& columnName);
183  // </group>
184 
185  // Access the key values.
186  // These functions allow you to create RecordFieldPtr<T> objects
187  // for each field in the key. In this way you can quickly fill in
188  // the key.
189  // <br>The records have a fixed type, so you cannot add or delete fields.
190  // <br>Note that <src>accessKey</src> and <src>accessLowerKey</src>
191  // are synonyms; they return the same underlying record.
192  // <group>
193  Record& accessKey();
196  // </group>
197 
198  // Find the row number matching the key. All keys have to be unique,
199  // otherwise an exception is thrown.
200  // If no match is found, <src>found</src> is set to False.
201  // The 2nd version makes it possible to pass in your own Record
202  // instead of using the internal record via the <src>accessKey</src>
203  // functions. Note that the given Record will be copied to the internal
204  // record, thus overwrites it.
205  // <group>
206  uInt getRowNumber (Bool& found);
207  uInt getRowNumber (Bool& found, const Record& key);
208  // </group>
209 
210  // Find the row numbers matching the key. It should be used instead
211  // of <src>getRowNumber</src> if the same key can exist multiple times.
212  // The 2nd version makes it possible to pass in your own Record
213  // instead of using the internal record via the <src>accessKey</src>
214  // functions. Note that the given Record will be copied to the internal
215  // record, thus overwrites it.
216  // <br>A row can contain multiple equal values. In such a case the
217  // same row number can occur multiple times in the output vector,
218  // unless <src>unique</src> is set to True. Note that making the row
219  // numbers unique implies a sort, so it can also be used to get the
220  // row numbers in ascending order.
221  // <group>
223  Vector<uInt> getRowNumbers (const Record& key, Bool unique=False);
224  // </group>
225 
226  // Find the row numbers matching the key range. The boolean arguments
227  // tell if the lower and upper key are part of the range.
228  // The 2nd version makes it possible to pass in your own Records
229  // instead of using the internal records via the
230  // <src>accessLower/UpperKey</src> functions.
231  // Note that the given Records will be copied to the internal
232  // records, thus overwrite them.
233  // <br>A row can contain multiple matching values. In such a case the
234  // same row number can occur multiple times in the output vector,
235  // unless <src>unique</src> is set to True. Note that making the row
236  // numbers unique implies a sort, so it can also be used to get the
237  // row numbers in ascending order.
238  // <group>
239  Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive,
240  Bool unique=False);
241  Vector<uInt> getRowNumbers (const Record& lower, const Record& upper,
242  Bool lowerInclusive, Bool upperInclusive,
243  Bool unique=False);
244  // </group>
245 
246 protected:
247  // Copy that object to this.
248  void copy (const ColumnsIndexArray& that);
249 
250  // Delete all data in the object.
251  void deleteObjects();
252 
253  // Add a column to the record description for the keys.
254  // If the switch <src>arrayPossible</src> is True, the column can
255  // be an array. Otherwise it has to be a scalar.
256  void addColumnToDesc (RecordDesc& description,
257  const TableColumn& column);
258 
259  // Make the various internal <src>RecordFieldPtr</src> objects.
260  void makeObjects (const RecordDesc& description);
261 
262  // Read the data of the columns forming the index, sort them and
263  // form the index.
264  void readData();
265 
266  // Do a binary search on <src>itsUniqueIndexArray</src> for the key in
267  // <src>fieldPtrs</src>.
268  // If the key is found, <src>found</src> is set to True and the index
269  // in <src>itsUniqueIndexArray</src> is returned.
270  // If not found, <src>found</src> is set to False and the index
271  // of the next higher key is returned.
272  uInt bsearch (Bool& found, void* fieldPtr) const;
273 
274  // Compare the key in <src>fieldPtr</src> with the given index entry.
275  // -1 is returned when less, 0 when equal, 1 when greater.
276  static Int compare (void* fieldPtr,
277  void* dataPtr,
278  Int dataType,
279  Int index);
280 
281  // Fill the row numbers vector for the given start till end in the
282  // <src>itsUniqueIndexArray</src> vector (end is not inclusive).
283  // If <src>unique</src> is True, the row numbers will be made unique.
284  void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end,
285  Bool unique) const;
286 
287  // Get the data if the column is an array.
288  // <group>
289  void getArray (Vector<uChar>& result, const String& name);
290  void getArray (Vector<Short>& result, const String& name);
291  void getArray (Vector<Int>& result, const String& name);
292  void getArray (Vector<uInt>& result, const String& name);
293  void getArray (Vector<String>& result, const String& name);
294  // </group>
295 
296  // Fill the rownrs belonging to each array value.
297  void fillRownrs (uInt npts, const Block<uInt>& nrel);
298 
299 private:
306  void* itsData; //# pointer to data in itsDataVector
307  //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*.
308  //# They are used for fast access to the records.
312  Vector<uInt> itsDataIndex; //# Row numbers of all keys
313  //# Indices in itsDataIndex for each unique key
315  Block<uInt> itsRownrs; //# rownr for each value
316  uInt* itsDataInx; //# pointer to data in itsDataIndex
317  uInt* itsUniqueInx; //# pointer to data in itsUniqueIndex
318 };
319 
320 
322 {
324 }
325 inline const Table& ColumnsIndexArray::table() const
326 {
327  return itsTable;
328 }
330 {
331  return *itsLowerKeyPtr;
332 }
334 {
335  return *itsLowerKeyPtr;
336 }
338 {
339  return *itsUpperKeyPtr;
340 }
341 
342 
343 
344 } //# NAMESPACE CASACORE - END
345 
346 #endif
Vector< uInt > getRowNumbers(Bool unique=False)
Find the row numbers matching the key.
ColumnsIndexArray & operator=(const ColumnsIndexArray &that)
Assignment (copy semantics).
int Int
Definition: aipstype.h:47
uInt bsearch(Bool &found, void *fieldPtr) const
Do a binary search on itsUniqueIndexArray for the key in fieldPtrs.
void makeObjects(const RecordDesc &description)
Make the various internal RecordFieldPtr objects.
Main interface class to a read/write table.
Definition: Table.h:149
void fillRowNumbers(Vector< uInt > &rows, uInt start, uInt end, Bool unique) const
Fill the row numbers vector for the given start till end in the itsUniqueIndexArray vector (end is no...
Bool isUnique() const
Are all keys in the index unique?
ColumnsIndexArray(const Table &, const String &columnName)
Create an index on the given table for the given column.
uInt getRowNumber(Bool &found)
Find the row number matching the key.
Index to an array column in a table.
Record & accessKey()
Access the key values.
size_t nelements() const
How many elements does this array have? Product of all axis lengths.
Definition: ArrayBase.h:99
Description of the fields in a record object.
Definition: RecordDesc.h:105
static Int compare(void *fieldPtr, void *dataPtr, Int dataType, Int index)
Compare the key in fieldPtr with the given index entry.
A hierarchical collection of named fields of various types.
Definition: Record.h:181
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
void copy(const ColumnsIndexArray &that)
Copy that object to this.
Read/write access to a table column.
Definition: TableColumn.h:98
const Bool False
Definition: aipstype.h:41
void setChanged()
Something has changed in the table, so the index has to be recreated.
const String & columnName() const
Return the names of the columns forming the index.
void fillRownrs(uInt npts, const Block< uInt > &nrel)
Fill the rownrs belonging to each array value.
void readData()
Read the data of the columns forming the index, sort them and form the index.
const Table & table() const
Get the table for which this index is created.
String: the storage and methods of handling collections of characters.
Definition: String.h:223
void deleteObjects()
Delete all data in the object.
void getArray(Vector< uChar > &result, const String &name)
Get the data if the column is an array.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:48
void addColumnToDesc(RecordDesc &description, const TableColumn &column)
Add a column to the record description for the keys.