27 #ifndef SCIMATH_CLASSICALSTATS_H 28 #define SCIMATH_CLASSICALSTATS_H 30 #include <casacore/casa/aips.h> 32 #include <casacore/scimath/Mathematics/StatisticsAlgorithm.h> 34 #include <casacore/scimath/Mathematics/StatisticsTypes.h> 35 #include <casacore/scimath/Mathematics/StatisticsUtilities.h> 57 template <
class AccumType,
class DataIterator,
class MaskIterator=const Bool*,
class WeightsIterator=DataIterator>
137 std::map<Double, AccumType>& quantiles,
const std::set<Double>& fractions,
140 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
148 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
157 uInt binningThreshholdSizeBytes=4096*4096,
Bool persistSortedArray=
False,
167 virtual void getMinMax(AccumType& mymin, AccumType& mymax);
180 virtual void reset();
202 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
207 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
213 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
214 const MaskIterator& maskBegin,
uInt maskStride
219 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
220 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
226 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
232 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
238 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
239 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
245 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
246 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
252 AccumType& mymin, AccumType& mymax,
Int64& minpos,
Int64& maxpos,
253 const AccumType& datum ,
Int64 count
257 AccumType& mymin, AccumType& mymax,
Int64& minpos,
Int64& maxpos,
const AccumType& datum,
258 const AccumType& weight,
Int64 count
269 void _doMinMax(AccumType& vmin, AccumType& vmax);
277 vector<vector<uInt64> >& binCounts,
279 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
281 const vector<AccumType>& maxLimit
285 vector<vector<uInt64> >& binCounts,
287 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
293 vector<vector<uInt64> >& binCounts,
295 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
296 const MaskIterator& maskBegin,
uInt maskStride,
301 vector<vector<uInt64> >& binCounts,
303 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
304 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
310 vector<vector<uInt64> >& binCounts,
312 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
318 vector<vector<uInt64> >& binCounts,
320 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
326 vector<vector<uInt64> >& binCounts,
328 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
329 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
335 vector<vector<uInt64> >& binCounts,
337 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
338 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
356 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
361 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
367 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
368 const MaskIterator& maskBegin,
uInt maskStride
373 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
374 const MaskIterator& maskBegin,
uInt maskStride,
const DataRanges& ranges,
380 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
386 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
392 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
393 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
399 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
400 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
408 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
413 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
418 vector<AccumType>& ary,
const DataIterator& dataBegin,
419 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
425 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
426 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
432 vector<AccumType>& ary,
const DataIterator& dataBegin,
433 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride
438 vector<AccumType>& ary,
const DataIterator& dataBegin,
439 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
445 vector<AccumType>& ary,
const DataIterator& dataBegin,
446 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
447 const MaskIterator& maskBegin,
uInt maskStride
452 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
453 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
465 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
466 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
471 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
Int64 nr,
473 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
477 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
478 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
480 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
485 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
Int64 nr,
486 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
488 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
493 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
494 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
495 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
500 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
501 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
503 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
508 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
509 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
510 const MaskIterator& maskBegin,
uInt maskStride,
511 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
516 vector<vector<AccumType> >& arys,
uInt& currentCount,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
517 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
519 const vector<std::pair<AccumType, AccumType> > &includeLimits,
uInt maxCount
526 vector<AccumType>& ary,
const DataIterator& dataBegin,
532 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
539 vector<AccumType>& ary,
const DataIterator& dataBegin,
540 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
546 vector<AccumType>& ary,
const DataIterator& dataBegin,
Int64 nr,
547 uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
553 vector<AccumType>& ary,
const DataIterator& dataBegin,
554 const WeightsIterator& weightBegin,
Int64 nr,
uInt dataStride,
560 vector<AccumType>& ary,
const DataIterator& dataBegin,
561 const WeightsIterator& weightsBegin,
Int64 nr,
uInt dataStride,
567 vector<AccumType>& ary,
const DataIterator& dataBegin,
568 const WeightsIterator& weightBegin,
Int64 nr,
569 uInt dataStride,
const MaskIterator& maskBegin,
575 vector<AccumType>& ary,
const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
576 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
585 uInt64& ngood, AccumType& mymin, AccumType& mymax,
587 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride
592 uInt64& ngood, AccumType& mymin, AccumType& mymax,
594 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
600 uInt64& ngood, AccumType& mymin, AccumType& mymax,
602 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
603 const MaskIterator& maskBegin,
uInt maskStride
608 uInt64& ngood, AccumType& mymin, AccumType& mymax,
610 const DataIterator& dataBegin,
Int64 nr,
uInt dataStride,
611 const MaskIterator& maskBegin,
uInt maskStride,
619 AccumType& mymin, AccumType& mymax,
621 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
626 AccumType& mymin, AccumType& mymax,
628 const DataIterator& dataBegin,
const WeightsIterator& weightsBegin,
633 AccumType& mymin, AccumType& mymax,
635 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
636 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride
640 AccumType& mymin, AccumType& mymax,
642 const DataIterator& dataBegin,
const WeightsIterator& weightBegin,
643 Int64 nr,
uInt dataStride,
const MaskIterator& maskBegin,
uInt maskStride,
655 mutable typename vector<DataIterator>::const_iterator
_dend,
_diter;
656 mutable vector<Int64>::const_iterator
_citer;
658 mutable std::map<uInt, MaskIterator>
_masks;
689 vector<AccumType>&
array 693 vector<vector<AccumType> >& arrays,
694 const vector<std::pair<AccumType, AccumType> > &includeLimits,
706 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
710 const vector<uInt64>& binNpts,
uInt maxArraySize,
711 const vector<std::pair<AccumType, AccumType> >& binLimits,
712 const vector<std::set<uInt64> >& dataIndices,
uInt64 nBins
721 const std::set<uInt64>& dataIndices,
Bool persistSortedArray,
755 AccumType mymin, AccumType mymax,
Int64 minpos,
766 const std::set<uInt64>& indices,
uInt maxArraySize,
767 Bool persistSortedArray
773 #ifndef CASACORE_NO_AUTO_TEMPLATES 774 #include <casacore/scimath/Mathematics/ClassicalStatistics.tcc> 775 #endif //# CASACORE_NO_AUTO_TEMPLATES void _doMinMax(AccumType &vmin, AccumType &vmax)
scan dataset(s) to find min and max
vector< DataIterator >::const_iterator _dend
mutables, used to mitigate repeated code
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
virtual void _updateMaxMin(AccumType mymin, AccumType mymax, Int64 minpos, Int64 maxpos, uInt dataStride, const Int64 ¤tDataset)
update min and max if necessary
LatticeExprNode median(const LatticeExprNode &expr)
AccumType _getStatistic(StatisticsData::STATS stat)
ClassicalStatistics< AccumType, DataIterator, MaskIterator, WeightsIterator > & operator=(const ClassicalStatistics< AccumType, DataIterator, MaskIterator, WeightsIterator > &other)
copy semantics
vector< DataIterator >::const_iterator _diter
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
StatsData< AccumType > _getStatistics()
virtual StatsData< AccumType > & _getStatsData()
retreive stats structure.
TableExprNode array(const TableExprNode &values, const TableExprNodeSet &shape)
Create an array of the given shape and fill it with the values.
void _createDataArray(vector< AccumType > &array)
Create an unsorted array of the complete data set.
virtual void _weightedStats(AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, Int64 nr, uInt dataStride)
has weights, but no mask, no ranges
unsigned long long uInt64
std::set< uInt64 > _medianIndices(CountedPtr< uInt64 > knownNpts)
If input set has one value, that is the median, if it has two, the median is the average of those...
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
std::map< uInt, DataRanges > _ranges
void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
Abstract base class which defines interface for providing "datasets" to the statistics framework when...
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
vector< vector< uInt64 > > _binCounts(vector< CountedPtr< AccumType > > &sameVal, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc)
tally the number of data points that fall into each bin provided by binDesc Any points that are less ...
std::map< uInt, WeightsIterator > _weights
virtual void _unweightedStats(uInt64 &ngood, AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const DataIterator &dataBegin, Int64 nr, uInt dataStride)
no weights, no mask, no ranges
WeightsIterator _myWeights
ALGORITHM
implemented algorithms
virtual const StatsData< AccumType > & _getStatsData() const
void _addData()
Allows derived classes to do things after data is set or added.
Referenced counted pointer for constant data.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
get the median of the absolute deviation about the median of the data.
std::map< uInt, Bool > _isIncludeRanges
virtual void _findBins(vector< vector< uInt64 > > &binCounts, vector< CountedPtr< AccumType > > &sameVal, vector< Bool > &allSame, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, const vector< AccumType > &maxLimit) const
Get the counts of data within the specified histogram bins.
static void _convertToAbsDevMedArray(vector< AccumType > &myArray, AccumType median)
convert in place by taking the absolute value of the difference of the vector and the median ...
void _createDataArrays(vector< vector< AccumType > > &arrays, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt maxCount)
#define DataRanges
Commonly used types in statistics framework.
bool Bool
Define the standard types used by Casacore.
virtual Bool _populateTestArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride, uInt maxElements) const
no weights, no mask, no ranges
virtual void setCalculateAsAdded(Bool c)
Should statistics be updated with calls to addData or should they only be calculated upon calls to ge...
void setDataProvider(StatsDataProvider< AccumType, DataIterator, MaskIterator, WeightsIterator > *dataProvider)
An exception will be thrown if setCalculateAsAdded(True) has been called.
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
Get the specified quantiles.
std::map< uInt, MaskIterator > _masks
static void _makeBins(typename StatisticsUtilities< AccumType >::BinDesc &bins, AccumType minData, AccumType maxData, uInt maxBins, Bool allowPad)
If allowPad is True, then pad the lower side of the lowest bin and the higher side of the highest bin...
virtual ~ClassicalStatistics()
vector< uInt >::const_iterator _dsiter
void _accumulate(AccumType &mymin, AccumType &mymax, Int64 &minpos, Int64 &maxpos, const AccumType &datum, Int64 count)
vector< std::map< uInt64, AccumType > > _dataFromMultipleBins(const vector< typename StatisticsUtilities< AccumType >::BinDesc > &binDesc, uInt maxArraySize, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
extract data from multiple histograms given by binDesc.
StatsData< AccumType > _statsData
virtual void reset()
reset object to initial state.
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
vector< Int64 >::const_iterator _citer
const Double c
Fundamental physical constants (SI units):
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=NULL, CountedPtr< AccumType > knownMin=NULL, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt64 nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
std::map< uInt64, AccumType > _indicesToValues(CountedPtr< uInt64 > knownNpts, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax, uInt maxArraySize, const std::set< uInt64 > &dataIndices, Bool persistSortedArray, uInt64 nBins)
get the values for the specified indices in the sorted array of all good data
Bool _isNptsSmallerThan(vector< AccumType > &arrayToSort, uInt maxArraySize)
Determine by scanning the dataset if the number of good points is smaller than maxArraySize.
Bool _valuesFromSortedArray(std::map< uInt64, AccumType > &values, CountedPtr< uInt64 > knownNpts, const std::set< uInt64 > &indices, uInt maxArraySize, Bool persistSortedArray)
get values from sorted array if the array is small enough to be held in memory.
virtual void _populateArray(vector< AccumType > &ary, const DataIterator &dataBegin, Int64 nr, uInt dataStride) const
populate an unsorted array with valid data.
vector< std::map< uInt64, AccumType > > _dataFromSingleBins(const vector< uInt64 > &binNpts, uInt maxArraySize, const vector< std::pair< AccumType, AccumType > > &binLimits, const vector< std::set< uInt64 > > &dataIndices, uInt64 nBins)
Base class of statistics algorithm class hierarchy.
this file contains all the compiler specific defines
virtual void _populateArrays(vector< vector< AccumType > > &arys, uInt ¤tCount, const DataIterator &dataBegin, Int64 nr, uInt dataStride, const vector< std::pair< AccumType, AccumType > > &includeLimits, uInt maxCount) const
Create a vector of unsorted arrays, one array for each bin defined by includeLimits.
description of a regularly spaced bins with the first bin having lower limit of minLimit and having n...