casacore
FitToHalfStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 //# $Id: Array.h 21545 2015-01-22 19:36:35Z gervandiepen $
26 
27 #ifndef SCIMATH_FITTOHALFSTATISTICS_H
28 #define SCIMATH_FITTOHALFSTATISTICS_H
29 
30 #include <casacore/casa/aips.h>
31 
32 #include <casacore/scimath/Mathematics/ConstrainedRangeStatistics.h>
33 #include <casacore/scimath/Mathematics/FitToHalfStatisticsData.h>
34 
35 namespace casacore {
36 
37 // Class to calculate statistics using the so-called fit to half algorithm. In this
38 // algorithm, a center value is specified, and only points greater or equal or less or equal
39 // this value are included. Furthermore, each of the included points is reflected about
40 // the center value, and these virtual points are added to the included points and
41 // the union of sets of included real points and virtual points are used for computing statistics.
42 // The specified center point is therefore the mean and median of the resulting
43 // distribution, and the total number of points is exactly twice the number of real
44 // data points that are included.
45 
46 template <class AccumType, class DataIterator, class MaskIterator=const Bool *, class WeightsIterator=DataIterator>
48  : public ConstrainedRangeStatistics<CASA_STATP> {
49 public:
50 
51  const static AccumType TWO;
52 
53  // <src>value</src> is only used if <src>center</src>=CVALUE
57  AccumType value=0
58  );
59 
60  virtual ~FitToHalfStatistics();
61 
62  // copy semantics
65  );
66 
67  // get the algorithm that this object uses for computing stats
70  };
71 
72  // The median is just the center value, so none of the parameters to this method are used.
73  AccumType getMedian(
74  CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
75  CountedPtr<AccumType> knownMax=NULL, uInt binningThreshholdSizeBytes=4096*4096,
76  Bool persistSortedArray=False, uInt64 nBins=10000
77  );
78 
79  // <group>
80  // In the following group of methods, if the size of the composite dataset
81  // is smaller than
82  // <src>binningThreshholdSizeBytes</src>, the composite dataset
83  // will be (perhaps partially) sorted and persisted in memory during the
84  // call. In that case, and if <src>persistSortedArray</src> is True, this
85  // sorted array will remain in memory after the call and will be used on
86  // subsequent calls of this method when <src>binningThreshholdSizeBytes</src>
87  // is greater than the size of the composite dataset. If
88  // <src>persistSortedArray</src> is False, the sorted array will not be
89  // stored after this call completes and so any subsequent calls for which the
90  // dataset size is less than <src>binningThreshholdSizeBytes</src>, the
91  // dataset will be sorted from scratch. Values which are not included due to
92  // non-unity strides, are not included in any specified ranges, are masked,
93  // or have associated weights of zero are not considered as dataset members
94  // for quantile computations.
95  // If one has a priori information regarding
96  // the number of points (npts) and/or the minimum and maximum values of the data
97  // set, these can be supplied to improve performance. Note however, that if these
98  // values are not correct, the resulting median
99  // and/or quantile values will also not be correct (although see the following notes regarding
100  // max/min). Note that if this object has already had getStatistics()
101  // called, and the min and max were calculated, there is no need to pass these values in
102  // as they have been stored internally and used (although passing them in shouldn't hurt
103  // anything). If provided, npts, the number of points falling in the specified ranges which are
104  // not masked and have weights > 0, should be exactly correct. <src>min</src> can be less than
105  // the true minimum, and <src>max</src> can be greater than the True maximum, but for best
106  // performance, these should be as close to the actual min and max as possible.
107 
108  AccumType getMedianAndQuantiles(
109  std::map<Double, AccumType>& quantiles, const std::set<Double>& fractions,
110  CountedPtr<uInt64> knownNpts=NULL, CountedPtr<AccumType> knownMin=NULL,
111  CountedPtr<AccumType> knownMax=NULL,
112  uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
113  uInt64 nBins=10000
114  );
115 
116  // get the median of the absolute deviation about the median of the data.
117  AccumType getMedianAbsDevMed(
118  CountedPtr<uInt64> knownNpts=NULL,
119  CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
120  uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
121  uInt64 nBins=10000
122  );
123 
124  // Get the specified quantiles. <src>fractions</src> must be between 0 and 1,
125  // noninclusive.
126  std::map<Double, AccumType> getQuantiles(
127  const std::set<Double>& fractions, CountedPtr<uInt64> knownNpts=NULL,
128  CountedPtr<AccumType> knownMin=NULL, CountedPtr<AccumType> knownMax=NULL,
129  uInt binningThreshholdSizeBytes=4096*4096, Bool persistSortedArray=False,
130  uInt64 nBins=10000
131  );
132  // </group>
133 
134  // scan the dataset(s) that have been added, and find the min and max.
135  // This method may be called even if setStatsToCaclulate has been called and
136  // MAX and MIN has been excluded.
137  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
138 
139  // scan the dataset(s) that have been added, and find the number of good points.
140  // This method may be called even if setStatsToCaclulate has been called and
141  // NPTS has been excluded. If setCalculateAsAdded(True) has previously been
142  // called after this object has been (re)initialized, an exception will be thrown.
143  uInt64 getNPts();
144 
145  // reset object to initial state. Clears all private fields including data,
146  // accumulators, global range. It does not affect the fence factor (_f), which was
147  // set at object construction.
148  virtual void reset();
149 
150  // This class does not allow statistics to be calculated as datasets are added, so
151  // an exception will be thrown if <src>c</src> is True.
152  void setCalculateAsAdded(Bool c);
153 
154 protected:
155 
156  virtual void _clearData();
157 
159 
161 
162  inline const StatsData<AccumType>& _getStatsData() const { return _statsData; }
163 
164  // <group>
165  // no weights, no mask, no ranges
166  void _unweightedStats(
167  uInt64& ngood, AccumType& mymin, AccumType& mymax,
168  Int64& minpos, Int64& maxpos,
169  const DataIterator& dataBegin, Int64 nr, uInt dataStride
170  );
171 
172  // no weights, no mask
173  void _unweightedStats(
174  uInt64& ngood, AccumType& mymin, AccumType& mymax,
175  Int64& minpos, Int64& maxpos,
176  const DataIterator& dataBegin, Int64 nr, uInt dataStride,
177  const DataRanges& ranges, Bool isInclude
178  );
179 
180  void _unweightedStats(
181  uInt64& ngood, AccumType& mymin, AccumType& mymax,
182  Int64& minpos, Int64& maxpos,
183  const DataIterator& dataBegin, Int64 nr, uInt dataStride,
184  const MaskIterator& maskBegin, uInt maskStride
185  );
186 
187  void _unweightedStats(
188  uInt64& ngood, AccumType& mymin, AccumType& mymax,
189  Int64& minpos, Int64& maxpos,
190  const DataIterator& dataBegin, Int64 nr, uInt dataStride,
191  const MaskIterator& maskBegin, uInt maskStride,
192  const DataRanges& ranges, Bool isInclude
193  );
194  // </group>
195 
196  virtual void _updateMaxMin(
197  AccumType mymin, AccumType mymax, Int64 minpos,
198  Int64 maxpos, uInt dataStride, const Int64& currentDataset
199  );
200 
201  // <group>
202  // has weights, but no mask, no ranges
203  void _weightedStats(
204  AccumType& mymin, AccumType& mymax,
205  Int64& minpos, Int64& maxpos,
206  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
207  Int64 nr, uInt dataStride
208  );
209 
210  void _weightedStats(
211  AccumType& mymin, AccumType& mymax,
212  Int64& minpos, Int64& maxpos,
213  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
214  Int64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
215  );
216 
217  void _weightedStats(
218  AccumType& mymin, AccumType& mymax,
219  Int64& minpos, Int64& maxpos,
220  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
221  Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
222  );
223 
224  void _weightedStats(
225  AccumType& mymin, AccumType& mymax,
226  Int64& minpos, Int64& maxpos,
227  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
228  Int64 nr, uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
229  const DataRanges& ranges, Bool isInclude
230  );
231  // </group>
232 
233 private:
236  AccumType _centerValue;
239  // these are the max and min for the real portion of the dataset
241 
242  void _getRealMinMax(
243  CountedPtr<AccumType>& realMin, CountedPtr<AccumType>& realMax,
245  );
246 
247  void _setRange();
248 };
249 
250 }
251 
252 #ifndef CASACORE_NO_AUTO_TEMPLATES
253 #include <casacore/scimath/Mathematics/FitToHalfStatistics.tcc>
254 #endif //# CASACORE_NO_AUTO_TEMPLATES
255 
256 #endif
virtual void reset()
reset object to initial state.
AccumType getMedian(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
The median is just the center value, so none of the parameters to this method are used...
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
StatsData< AccumType > & _getStatsData()
retreive stats structure.
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
void _weightedStats(AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, Int64 nr, uInt dataStride)
has weights, but no mask, no ranges
unsigned long long uInt64
Definition: aipsxtype.h:39
StatsData< AccumType > _statsData
void _unweightedStats(uInt64 &ngood, AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const DataIterator &dataBegin, Int64 nr, uInt dataStride)
no weights, no mask, no ranges
USE_DATA
which section of data to use, greater than or less than the center value
FitToHalfStatistics(FitToHalfStatisticsData::CENTER center=FitToHalfStatisticsData::CMEAN, FitToHalfStatisticsData::USE_DATA useData=FitToHalfStatisticsData::LE_CENTER, AccumType value=0)
value is only used if center=CVALUE
FitToHalfStatisticsData::CENTER _centerType
const StatsData< AccumType > & _getStatsData() const
ALGORITHM
implemented algorithms
StatsData< AccumType > _getStatistics()
CENTER
choice of center point based on the corresponding statistics from the entire distribution of data...
AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
CountedPtr< AccumType > _realMin
Referenced counted pointer for constant data.
Definition: CountedPtr.h:86
void _getRealMinMax(CountedPtr< AccumType > &realMin, CountedPtr< AccumType > &realMax, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax)
virtual void _updateMaxMin(AccumType mymin, AccumType mymax, Int64 minpos, Int64 maxpos, uInt dataStride, const Int64 &currentDataset)
void setCalculateAsAdded(Bool c)
This class does not allow statistics to be calculated as datasets are added, so an exception will be ...
Class to calculate statistics using the so-called fit to half algorithm.
#define DataRanges
Commonly used types in statistics framework.
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
Abstract base class for statistics algorithms which are characterized by a range of good values...
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
const Bool False
Definition: aipstype.h:41
std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
Get the specified quantiles.
CountedPtr< AccumType > _realMax
these are the max and min for the real portion of the dataset
const Double c
Fundamental physical constants (SI units):
FitToHalfStatistics< CASA_STATP > & operator=(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
get the median of the absolute deviation about the median of the data.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
LatticeExprNode value(const LatticeExprNode &expr)
This function returns the value of the expression without a mask.
unsigned int uInt
Definition: aipstype.h:48