mirror of
https://github.com/aportelli/LatAnalyze.git
synced 2024-11-14 01:45:35 +00:00
229 lines
6.0 KiB
C++
229 lines
6.0 KiB
C++
/*
|
|
* Histogram.cpp, part of LatAnalyze 3
|
|
*
|
|
* Copyright (C) 2013 - 2016 Antonin Portelli
|
|
*
|
|
* LatAnalyze 3 is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* LatAnalyze 3 is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with LatAnalyze 3. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <LatAnalyze/Statistics/Histogram.hpp>
|
|
#include <LatAnalyze/includes.hpp>
|
|
#include <gsl/gsl_histogram.h>
|
|
#include <gsl/gsl_sf.h>
|
|
#include <gsl/gsl_sort.h>
|
|
|
|
using namespace std;
|
|
using namespace Latan;
|
|
|
|
#define DECL_GSL_HIST(h) \
|
|
gsl_histogram h{static_cast<size_t>(bin_.size()), x_.data(), bin_.data()}
|
|
#define DECL_CONST_GSL_HIST(h) \
|
|
const gsl_histogram h{static_cast<size_t>(bin_.size()),\
|
|
const_cast<double *>(x_.data()),\
|
|
const_cast<double *>(bin_.data())}
|
|
|
|
/******************************************************************************
|
|
* Histogram implementation *
|
|
******************************************************************************/
|
|
// constructor /////////////////////////////////////////////////////////////////
|
|
Histogram::Histogram(const DVec &data, const double xMin, const double xMax,
|
|
const Index nBin)
|
|
: Histogram()
|
|
{
|
|
setFromData(data, xMin, xMax, nBin);
|
|
}
|
|
|
|
Histogram::Histogram(const DVec &data, const DVec &w, const double xMin,
|
|
const double xMax, const Index nBin)
|
|
: Histogram()
|
|
{
|
|
setFromData(data, w, xMin, xMax, nBin);
|
|
}
|
|
|
|
// resize //////////////////////////////////////////////////////////////////////
|
|
void Histogram::resize(const Index nBin)
|
|
{
|
|
x_.resize(nBin + 1);
|
|
bin_.resize(nBin);
|
|
}
|
|
|
|
// generate from data //////////////////////////////////////////////////////////
|
|
void Histogram::setFromData(const DVec &data, const DVec &w, const double xMin,
|
|
const double xMax, const Index nBin)
|
|
{
|
|
if (data.size() != w.size())
|
|
{
|
|
LATAN_ERROR(Size, "data vector and weight vector size mismatch");
|
|
}
|
|
resize(nBin);
|
|
data_ = data.array();
|
|
w_ = w.array();
|
|
xMax_ = xMax;
|
|
xMin_ = xMin;
|
|
makeHistogram();
|
|
}
|
|
|
|
void Histogram::setFromData(const DVec &data, const double xMin,
|
|
const double xMax, const Index nBin)
|
|
{
|
|
resize(nBin);
|
|
data_ = data.array();
|
|
xMax_ = xMax;
|
|
xMin_ = xMin;
|
|
w_.resize(data.size());
|
|
w_.fill(1.);
|
|
makeHistogram();
|
|
}
|
|
|
|
// histogram calculation ///////////////////////////////////////////////////////
|
|
void Histogram::makeHistogram(void)
|
|
{
|
|
DECL_GSL_HIST(h);
|
|
|
|
gsl_histogram_set_ranges_uniform(&h, xMin_, xMax_);
|
|
FOR_STAT_ARRAY(data_, i)
|
|
{
|
|
gsl_histogram_accumulate(&h, data_[i], w_[i]);
|
|
}
|
|
total_ = w_.sum();
|
|
sortIndices();
|
|
computeNorm();
|
|
}
|
|
|
|
// generate sorted indices /////////////////////////////////////////////////////
|
|
void Histogram::sortIndices(void)
|
|
{
|
|
sInd_.resize(data_.size());
|
|
gsl_sort_index(sInd_.data(), data_.data(), 1, data_.size());
|
|
}
|
|
|
|
// compute normalization factor ////////////////////////////////////////////////
|
|
void Histogram::computeNorm(void)
|
|
{
|
|
norm_ = static_cast<double>(bin_.size())/(total_*(xMax_ - xMin_));
|
|
}
|
|
|
|
// normalize as a probablility /////////////////////////////////////////////////
|
|
void Histogram::normalize(const bool n)
|
|
{
|
|
normalize_ = n;
|
|
}
|
|
|
|
bool Histogram::isNormalized(void) const
|
|
{
|
|
return normalize_;
|
|
}
|
|
|
|
// access //////////////////////////////////////////////////////////////////////
|
|
Index Histogram::size(void) const
|
|
{
|
|
return bin_.size();
|
|
}
|
|
|
|
const StatArray<double> & Histogram::getData(void) const
|
|
{
|
|
return data_;
|
|
}
|
|
|
|
const StatArray<double> & Histogram::getWeight(void) const
|
|
{
|
|
return w_;
|
|
}
|
|
|
|
double Histogram::getX(const Index i) const
|
|
{
|
|
return x_(i);
|
|
}
|
|
|
|
double Histogram::operator[](const Index i) const
|
|
{
|
|
return bin_(i)*(isNormalized() ? norm_ : 1.);
|
|
}
|
|
|
|
double Histogram::operator()(const double x) const
|
|
{
|
|
size_t i;
|
|
|
|
DECL_CONST_GSL_HIST(h);
|
|
|
|
gsl_histogram_find(&h, x, &i);
|
|
|
|
return (*this)[static_cast<Index>(i)];
|
|
}
|
|
|
|
// percentiles & confidence interval ///////////////////////////////////////////
|
|
double Histogram::percentile(const double p) const
|
|
{
|
|
if ((p < 0.0) or (p > 100.0))
|
|
{
|
|
LATAN_ERROR(Range, "percentile (" + strFrom(p) + ")"
|
|
" is outside the [0, 100] range");
|
|
}
|
|
|
|
// cf. http://en.wikipedia.org/wiki/Percentile
|
|
double wPSum, p_i, p_im1, w_i, res = 0.;
|
|
bool haveResult;
|
|
|
|
wPSum = w_[sInd_[0]];
|
|
p_i = (100./total_)*wPSum*0.5;
|
|
if (p < p_i)
|
|
{
|
|
res = data_[sInd_[0]];
|
|
}
|
|
else
|
|
{
|
|
haveResult = false;
|
|
p_im1 = p_i;
|
|
for (Index i = 1; i < data_.size(); ++i)
|
|
{
|
|
w_i = w_[sInd_[i]];
|
|
wPSum += w_i;
|
|
p_i = (100./total_)*(wPSum-0.5*w_i);
|
|
if ((p >= p_im1) and (p < p_i))
|
|
{
|
|
double d_i = data_[sInd_[i]], d_im1 = data_[sInd_[i-1]];
|
|
|
|
res = d_im1 + (p-p_im1)/(p_i-p_im1)*(d_i-d_im1);
|
|
haveResult = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!haveResult)
|
|
{
|
|
res = data_[sInd_[data_.size()-1]];
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
double Histogram::median(void) const
|
|
{
|
|
return percentile(50.);
|
|
}
|
|
|
|
pair<double, double> Histogram::confidenceInterval(const double nSigma) const
|
|
{
|
|
pair<double, double> interval, p;
|
|
double cl;
|
|
|
|
cl = gsl_sf_erf(nSigma/sqrt(2.));
|
|
p.first = 50.*(1. - cl);
|
|
p.second = 50.*(1. + cl);
|
|
interval.first = percentile(p.first);
|
|
interval.second = percentile(p.second);
|
|
|
|
return interval;
|
|
}
|