diff --git a/examples/Makefile.am b/examples/Makefile.am index 123bafb..8a51549 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -20,7 +20,8 @@ noinst_PROGRAMS = \ exPValue \ exRand \ exRootFinder \ - exThreadPool + exThreadPool \ + exVarBenchmark exCompiledDoubleFunction_SOURCES = exCompiledDoubleFunction.cpp exCompiledDoubleFunction_CXXFLAGS = $(COM_CXXFLAGS) @@ -78,4 +79,8 @@ exThreadPool_SOURCES = exThreadPool.cpp exThreadPool_CXXFLAGS = $(COM_CXXFLAGS) exThreadPool_LDFLAGS = -L../lib/.libs -lLatAnalyze +exVarBenchmark_SOURCES = exVarBenchmark.cpp +exVarBenchmark_CXXFLAGS = $(COM_CXXFLAGS) +exVarBenchmark_LDFLAGS = -L../lib/.libs -lLatAnalyze + ACLOCAL_AMFLAGS = -I .buildutils/m4 diff --git a/examples/exVarBenchmark.cpp b/examples/exVarBenchmark.cpp new file mode 100644 index 0000000..4c0ea9c --- /dev/null +++ b/examples/exVarBenchmark.cpp @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include + +using namespace std; +using namespace Latan; + +constexpr Index size = 1000; +constexpr Index nSample = 2000; + +int main(void) +{ + random_device rd; + DMat var(size, size); + DVec mean(size); + DMatSample sample(nSample, size, 1), sample2(nSample, size, 1); + + cout << "-- generating " << nSample << " Gaussian random vectors..." << endl; + var = DMat::Random(size, size); + var *= var.adjoint(); + mean = DVec::Random(size); + RandomNormal mgauss(mean, var, rd()); + sample[central] = mgauss(); + FOR_STAT_ARRAY(sample, s) + { + sample[s] = mgauss(); + } + sample2[central] = mgauss(); + FOR_STAT_ARRAY(sample, s) + { + sample2[s] = mgauss(); + } + + cout << "-- check new routines" << endl; + + DMat v, vo; + + cout << "var" << endl; + auto start = chrono::high_resolution_clock::now(); + vo = sample.varianceOld(); + auto end = chrono::high_resolution_clock::now(); + chrono::duration diff = end - start; + cout << "time " << diff.count() << endl; + start = chrono::high_resolution_clock::now(); + v = sample.variance(); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + cout << "diff " << (v - vo).norm() << endl; + + cout << "cov" << endl; + start = chrono::high_resolution_clock::now(); + vo = sample.covarianceOld(sample2); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + start = chrono::high_resolution_clock::now(); + v = sample.covariance(sample2); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + cout << "diff " << (v - vo).norm() << endl; + + cout << "mean" << endl; + start = chrono::high_resolution_clock::now(); + vo = sample.meanOld(3, 5); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + start = chrono::high_resolution_clock::now(); + v = sample.mean(3, 5); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + cout << "diff " << (v - vo).norm() << endl; + + cout << "varmat" << endl; + start = chrono::high_resolution_clock::now(); + vo = sample.varianceMatrixOld(); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + start = chrono::high_resolution_clock::now(); + v = sample.varianceMatrix(); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + cout << "diff " << (v - vo).norm() << endl; + + cout << "covarmat" << endl; + start = chrono::high_resolution_clock::now(); + vo = sample.covarianceMatrixOld(sample2); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + start = chrono::high_resolution_clock::now(); + v = sample.covarianceMatrix(sample2); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + cout << "diff " << (v - vo).norm() << endl; + + cout << "corrmat" << endl; + start = chrono::high_resolution_clock::now(); + vo = sample.correlationMatrixOld(); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + start = chrono::high_resolution_clock::now(); + v = sample.correlationMatrix(); + end = chrono::high_resolution_clock::now(); + diff = end - start; + cout << "time " << diff.count() << endl; + cout << "diff " << (v - vo).norm() << endl; + + return EXIT_SUCCESS; +} diff --git a/lib/Statistics/MatSample.hpp b/lib/Statistics/MatSample.hpp index a48db9d..a103215 100644 --- a/lib/Statistics/MatSample.hpp +++ b/lib/Statistics/MatSample.hpp @@ -103,6 +103,10 @@ public: const Index nCol); // resize all matrices void resizeMat(const Index nRow, const Index nCol); + // covariance matrix + Mat covarianceMatrix(const MatSample &sample) const; + Mat varianceMatrix(void) const; + Mat correlationMatrix(void) const; }; // non-member operators @@ -379,6 +383,78 @@ void MatSample::resizeMat(const Index nRow, const Index nCol) } } +// covariance matrix /////////////////////////////////////////////////////////// +template +Mat MatSample::covarianceMatrix(const MatSample &sample) const +{ + if (((*this)[central].cols() != 1) or (sample[central].cols() != 1)) + { + LATAN_ERROR(Size, "samples have more than one column"); + } + + Index n1 = (*this)[central].rows(), n2 = sample[central].rows(); + Index nSample = this->size(); + Mat tmp1(n1, nSample), tmp2(n2, nSample), res(n1, n2); + Mat s1(n1, 1), s2(n2, 1), one(nSample, 1); + + one.fill(1.); + s1.fill(0.); + s2.fill(0.); + for (unsigned int s = 0; s < nSample; ++s) + { + s1 += (*this)[s]; + tmp1.col(s) = (*this)[s]; + } + tmp1 -= s1*one.transpose()/static_cast(nSample); + for (unsigned int s = 0; s < nSample; ++s) + { + s2 += sample[s]; + tmp2.col(s) = sample[s]; + } + tmp2 -= s2*one.transpose()/static_cast(nSample); + res = tmp1*tmp2.transpose()/static_cast(nSample - 1); + + return res; +} + +template +Mat MatSample::varianceMatrix(void) const +{ + if ((*this)[central].cols() != 1) + { + LATAN_ERROR(Size, "samples have more than one column"); + } + + Index n1 = (*this)[central].rows(); + Index nSample = this->size(); + Mat tmp1(n1, nSample), res(n1, n1); + Mat s1(n1, 1), one(nSample, 1); + + one.fill(1.); + s1.fill(0.); + for (unsigned int s = 0; s < nSample; ++s) + { + s1 += (*this)[s]; + tmp1.col(s) = (*this)[s]; + } + tmp1 -= s1*one.transpose()/static_cast(nSample); + res = tmp1*tmp1.transpose()/static_cast(nSample - 1); + + return res; +} + +template +Mat MatSample::correlationMatrix(void) const +{ + Mat res = varianceMatrix(); + Mat invDiag(res.rows(), 1); + + invDiag = res.diagonal(); + invDiag = invDiag.cwiseInverse().cwiseSqrt(); + res = (invDiag*invDiag.transpose()).cwiseProduct(res); + + return res; +} END_LATAN_NAMESPACE diff --git a/lib/Statistics/StatArray.hpp b/lib/Statistics/StatArray.hpp index 9637089..15179e1 100644 --- a/lib/Statistics/StatArray.hpp +++ b/lib/Statistics/StatArray.hpp @@ -51,14 +51,17 @@ public: const T & operator[](const Index s) const; // statistics void bin(Index binSize); + T sum(const Index pos = 0, const Index n = -1) const; + T meanOld(const Index pos = 0, const Index n = -1) const; T mean(const Index pos = 0, const Index n = -1) const; - T covariance(const StatArray &array, const Index pos = 0, - const Index n = -1) const; - T covarianceMatrix(const StatArray &array, const Index pos = 0, + T covarianceOld(const StatArray &array) const; + T covariance(const StatArray &array) const; + T covarianceMatrixOld(const StatArray &array, const Index pos = 0, const Index n = -1) const; - T variance(const Index pos = 0, const Index n = -1) const; - T varianceMatrix(const Index pos = 0, const Index n = -1) const; - T correlationMatrix(const Index pos = 0, const Index n = -1) const; + T varianceOld(void) const; + T variance(void) const; + T varianceMatrixOld(const Index pos = 0, const Index n = -1) const; + T correlationMatrixOld(const Index pos = 0, const Index n = -1) const; // IO type virtual IoType getType(void) const; public: @@ -66,7 +69,7 @@ public: }; // reduction operations -namespace ReducOp +namespace StatOp { // general templates template @@ -149,42 +152,82 @@ void StatArray::bin(Index binSize) } template -T StatArray::mean(const Index pos, const Index n) const +T StatArray::meanOld(const Index pos, const Index n) const { T result = T(); const Index m = (n >= 0) ? n : size(); if (m) { - result = this->segment(pos+os, m).redux(&ReducOp::sum); + result = this->segment(pos+os, m).redux(&StatOp::sum); } return result/static_cast(m); } template -T StatArray::covariance(const StatArray &array, const Index pos, - const Index n) const +T StatArray::sum(const Index pos, const Index n) const +{ + T result; + const Index m = (n >= 0) ? n : size(); + + result = (*this)[pos]; + for (Index i = pos + 1; i < pos + m; ++i) + { + result += (*this)[i]; + } + + return result; +} + +template +T StatArray::mean(const Index pos, const Index n) const +{ + const Index m = (n >= 0) ? n : size(); + + return sum(pos, n)/static_cast(m); +} + +template +T StatArray::covarianceOld(const StatArray &array) const { T s1, s2, prs, res = T(); - const Index m = (n >= 0) ? n : size(); + const Index m = size(); if (m) { - auto arraySeg = array.segment(pos+os, m); - auto thisSeg = this->segment(pos+os, m); + auto arraySeg = array.segment(os, m); + auto thisSeg = this->segment(os, m); - s1 = thisSeg.redux(&ReducOp::sum); - s2 = arraySeg.redux(&ReducOp::sum); - prs = thisSeg.binaryExpr(arraySeg, &ReducOp::prod) - .redux(&ReducOp::sum); - res = prs - ReducOp::prod(s1, s2)/static_cast(m); + s1 = thisSeg.redux(&StatOp::sum); + s2 = arraySeg.redux(&StatOp::sum); + prs = thisSeg.binaryExpr(arraySeg, &StatOp::prod) + .redux(&StatOp::sum); + res = prs - StatOp::prod(s1, s2)/static_cast(m); } return res/static_cast(m - 1); } template -T StatArray::covarianceMatrix(const StatArray &array, +T StatArray::covariance(const StatArray &array) const +{ + T s1, s2, res; + + s1 = array.sum(); + s2 = this->sum(); + res = StatOp::prod(array[0], (*this)[0]); + for (Index i = 1; i < size(); ++i) + { + res += StatOp::prod(array[i], (*this)[i]); + } + res -= StatOp::prod(s1, s2)/static_cast(size()); + res /= static_cast(size() - 1); + + return res; +} + +template +T StatArray::covarianceMatrixOld(const StatArray &array, const Index pos, const Index n) const { T s1, s2, prs, res = T(); @@ -195,32 +238,38 @@ T StatArray::covarianceMatrix(const StatArray &array, auto arraySeg = array.segment(pos+os, m); auto thisSeg = this->segment(pos+os, m); - s1 = thisSeg.redux(&ReducOp::sum); - s2 = arraySeg.redux(&ReducOp::sum); - prs = thisSeg.binaryExpr(arraySeg, &ReducOp::tensProd) - .redux(&ReducOp::sum); - res = prs - ReducOp::tensProd(s1, s2)/static_cast(m); + s1 = thisSeg.redux(&StatOp::sum); + s2 = arraySeg.redux(&StatOp::sum); + prs = thisSeg.binaryExpr(arraySeg, &StatOp::tensProd) + .redux(&StatOp::sum); + res = prs - StatOp::tensProd(s1, s2)/static_cast(m); } return res/static_cast(m - 1); } template -T StatArray::variance(const Index pos, const Index n) const +T StatArray::variance(void) const { - return covariance(*this, pos, n); + return covariance(*this); } template -T StatArray::varianceMatrix(const Index pos, const Index n) const +T StatArray::varianceOld(void) const { - return covarianceMatrix(*this, pos, n); + return covarianceOld(*this); } template -T StatArray::correlationMatrix(const Index pos, const Index n) const +T StatArray::varianceMatrixOld(const Index pos, const Index n) const { - T res = varianceMatrix(pos, n); + return covarianceMatrixOld(*this, pos, n); +} + +template +T StatArray::correlationMatrixOld(const Index pos, const Index n) const +{ + T res = varianceMatrixOld(pos, n); T invDiag(res.rows(), 1); invDiag = res.diagonal(); @@ -231,8 +280,14 @@ T StatArray::correlationMatrix(const Index pos, const Index n) const } // reduction operations //////////////////////////////////////////////////////// -namespace ReducOp +namespace StatOp { + template + inline void zero(T &a) + { + a = 0.; + } + template inline T sum(const T &a, const T &b) {