casacore
IncrementalStMan.h
Go to the documentation of this file.
1//# IncrementalStMan.h: The Incremental Storage Manager
2//# Copyright (C) 1996,1997,1999
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//# $Id$
27
28#ifndef TABLES_INCREMENTALSTMAN_H
29#define TABLES_INCREMENTALSTMAN_H
30
31//# Includes
32#include <casacore/casa/aips.h>
33#include <casacore/tables/DataMan/ISMBase.h>
34
35
36namespace casacore { //# NAMESPACE CASACORE - BEGIN
37
38// <summary>
39// The Incremental Storage Manager
40// </summary>
41
42// <use visibility=export>
43
44// <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tIncrementalStMan.cc">
45// </reviewed>
46
47// <prerequisite>
48//# Classes you should understand before using this one.
49// <li> The Table Data Managers concept as described in module file
50// <linkto module="Tables:Data Managers">Tables.h</linkto>
51// <li> <linkto class=ROIncrementalStManAccessor>
52// ROIncrementalStManAccessor</linkto>
53// for a discussion of the cache size
54// </prerequisite>
55
56// <etymology>
57// IncrementalStMan is the data manager storing values in an incremental way
58// (similar to an incremental backup). A value is only stored when it
59// differs from the previous value.
60// </etymology>
61
62// <synopsis>
63// IncrementalStMan stores the data in a way that a value is only stored
64// when it is different from the value in the previous row. This storage
65// manager is very well suited for columns with slowly changing values,
66// because the resulting file can be much smaller. It is not suited at
67// all for columns with continuously changing data.
68// <p>
69// In general it can be advantageous to use this storage manager when
70// a value changes at most every 4 rows (although it depends on the length
71// of the data values themselves). The following simple example
72// shows the approximate savings that can be achieved when storing a column
73// with double values changing every CH rows.
74// <srcblock>
75// #rows CH normal length ISM length compress ratio
76// 50000 5 4000000 1606000 2.5
77// 50000 50 4000000 164000 24.5
78// 50000 500 4000000 32800 122
79// </srcblock>
80// There is a special test program <src>nISMBucket</src> in the Tables module
81// doing a simple, but usually adequate, simulation of the amount of
82// storage needed for a scenario.
83// <p>
84// IncrementalStMan stores the values (and associated indices) in
85// fixed-length buckets. A <linkto class=BucketCache>BucketCache</linkto>
86// object is used to read/write
87// the buckets. The default cache size is 1 bucket (which is fine for
88// sequential access), but for random access it can make sense to
89// increase the size of the cache. This can be done using
90// the class <linkto class=ROIncrementalStManAccessor>
91// ROIncrementalStManAccessor</linkto>.
92// <p>
93// The IncrementalStMan can hold values of any standard data type (thus
94// from Bool to String). It can handle scalars, direct and indirect
95// arrays. It can support an arbitrary number of columns. The values in
96// each of them can vary at its own speed.
97// <br>
98// A bucket contains the values of several consecutive rows.
99// At the beginning of a bucket the values of the starting row of all
100// columns for this storage manager are repeated. In this way the value
101// of a cell can always be found in the bucket and no references
102// to previous buckets are needed.
103// <br>A bucket should be big enough to hold all starting values and
104// a reasonable number of other values. As a rule of thumb it should be
105// big enough to hold at least 100 values of each column. In general the
106// default bucket size will do. Only in special cases (e.g. when storing
107// large variable length strings) the bucket size should be set explicitly.
108// Giving a zero bucket size means that a suitale default bucket size
109// will be calculated.
110// <br>
111// When a table is filled sequentially each bucket can be filled as
112// much as possible. When writing in a random way, buckets can contain
113// some unused space, because a bucket in the middle of the file
114// has to be split when a new value has to be put in it.
115// <p>
116// Each column in the IncrementalStMan has the following properties to
117// achieve the "store-different-values-only" behaviour.
118// <ul>
119// <li> When a row is not explicitly put, it has the same value as the
120// previous row.
121// The first row gets the standard undefined values when not put.
122// The order of put's and addRow's is not important.
123// <br>E.g. when a table has N rows and row N and the following M rows
124// have the same value, the following schematic code has the same effect:
125// <br><src> add 1 row; put value in row N; add M rows;</src>
126// <br><src> add M+1 rows; put value in row N;</src>
127// <li> When putting a scalar or direct array, it is tested if it matches
128// the previous row. If so, it is not stored again.
129// This test is not done for indirect arrays, because those can
130// be (very) big and it would be too time-consuming. So the only
131// way to save space for indirect arrays is by not putting them
132// as explained in the previous item.
133// <li> For indirect arrays the buckets contain a pointer only. The
134// arrays themselves are stored in a separate file.
135// <li> When a value of an existing row is updated, only that one row is
136// updated. The next row(s) keep their value, even if it was
137// shared with the row being updated.
138// <br>For scalars and direct arrays it will be tested if the
139// new value matches the value in the previous and/or next row.
140// If so, those rows will be combined to save storage.
141// <li> The IncrementalStMan is optimized for sequential access to a table.
142// <br>- A bucket is accessed only once, because a bucket contains
143// consecutive rows.
144// <br>- For each column a copy is kept of the last value read.
145// So the value for the next rows (with that same value)
146// is immediately available.
147// <br>For random access the performance can be improved by setting
148// the cache size using class
149// <linkto class=ROIncrementalStManAccessor>
150// ROIncrementalStManAccessor</linkto>.
151// </ul>
152//
153// <note>This class contains many public functions which are only used
154// by other ISM classes. The only useful function for the user is the
155// constructor.
156// </note>
157
158// <motivation>
159// IncrementalStMan can save a lot of storage space.
160// Unlike the old StManMirAIO it stores the values directly in the
161// file to save on memory usage.
162// </motivation>
163
164// <example>
165// This example shows how to create a table and how to attach
166// the storage manager to some columns.
167// <srcblock>
168// SetupNewTable newtab("name.data", tableDesc, Table::New);
169// IncrementalStMan stman; // define storage manager
170// newtab.bindColumn ("column1", stman); // bind column to st.man.
171// newtab.bindColumn ("column2", stman); // bind column to st.man.
172// Table tab(newtab); // actually create table
173// </srcblock>
174// </example>
175
176//# <todo asof="$DATE:$">
177//# A List of bugs, limitations, extensions or planned refinements.
178//# </todo>
179
180
182{
183public:
184 // Create an incremental storage manager with the given name.
185 // If no name is used, it is set to an empty string.
186 // The name can be used to construct a
187 // <linkto class=ROIncrementalStManAccessor>ROIncrementalStManAccessor
188 // </linkto> object (e.g. to set the cache size).
189 // <br>
190 // The bucket size has to be given in bytes and the cache size in buckets.
191 // Bucket size 0 means that the storage manager will set the bucket
192 // size such that it can contain about 100 rows
193 // (with a minimum size of 32768 bytes). However, if that results
194 // in a very large bucket size (>327680) it'll make it smaller.
195 // Note it uses 32 bytes for the size of variable length strings,
196 // so this heuristic may fail when a column contains large strings.
197 // When <src>checkBucketSize</src> is set and Bucket size > 0
198 // the storage manager throws an exception
199 // when the size is too small to hold the values of at least 2 rows.
200 // For this check it uses 0 for the length of variable length strings.
201 // <group>
203 Bool checkBucketSize = True,
204 uInt cacheSize = 1);
206 uInt bucketSize = 0,
207 Bool checkBucketSize = True,
208 uInt cacheSize = 1);
209 // </group>
210
212
213private:
214 // Copy constructor cannot be used.
216
217 // Assignment cannot be used.
219};
220
221
222
223} //# NAMESPACE CASACORE - END
224
225#endif
uInt bucketSize() const
Get the bucket size (in bytes).
Definition: ISMBase.h:407
virtual String dataManagerName() const
Get the name given to the storage manager (in the constructor).
uInt cacheSize() const
Get the current cache size (in buckets).
Definition: ISMBase.h:392
IncrementalStMan(const IncrementalStMan &that)
Copy constructor cannot be used.
IncrementalStMan & operator=(const IncrementalStMan &that)
Assignment cannot be used.
IncrementalStMan(uInt bucketSize=0, Bool checkBucketSize=True, uInt cacheSize=1)
Create an incremental storage manager with the given name.
IncrementalStMan(const String &dataManagerName, uInt bucketSize=0, Bool checkBucketSize=True, uInt cacheSize=1)
String: the storage and methods of handling collections of characters.
Definition: String.h:225
this file contains all the compiler specific defines
Definition: mainpage.dox:28
unsigned int uInt
Definition: aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool True
Definition: aipstype.h:43