From f9e355e3ea6f48846970ea4d26b27ab28db116b4 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Wed, 12 Feb 2014 18:39:17 +0000 Subject: [PATCH] first implementation of samples and datasets, still need IO for samples --- latan/Dataset.hpp | 155 ++++++++++++++++++++++++++++++++++++++++++++ latan/IoObject.hpp | 8 +-- latan/Makefile.am | 4 +- latan/Sample.cpp | 27 ++------ latan/Sample.hpp | 94 +++++++++++++++++++++++---- latan/StatArray.hpp | 141 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 390 insertions(+), 39 deletions(-) create mode 100644 latan/Dataset.hpp create mode 100644 latan/StatArray.hpp diff --git a/latan/Dataset.hpp b/latan/Dataset.hpp new file mode 100644 index 0000000..d712270 --- /dev/null +++ b/latan/Dataset.hpp @@ -0,0 +1,155 @@ +/* + * Dataset.hpp, part of LatAnalyze 3 + * + * Copyright (C) 2013 - 2014 Antonin Portelli + * + * LatAnalyze 3 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * LatAnalyze 3 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with LatAnalyze 3. If not, see . + */ + +#ifndef Latan_Dataset_hpp_ +#define Latan_Dataset_hpp_ + +#include +#include +#include +#include +#include +#include + +BEGIN_NAMESPACE + +/****************************************************************************** + * Dataset class * + ******************************************************************************/ +template +class Dataset: public StatArray +{ +private: + typedef StatArray Base; +public: + // constructor + Dataset(void); + Dataset(const std::string &listFileName, const std::string &dataName); + template + Dataset(const Eigen::EigenBase &dataset); + // destructor + virtual ~Dataset(void); + // IO + void load(const std::string &listFileName, const std::string &dataName); + // resampling + Sample bootstrapMean(const unsigned int nSample, RandGen& generator); +private: + // mean from pointer vector for resampling + void ptVectorMean(T &m, const std::vector &v); +private: + FileType file_; +}; + +/****************************************************************************** + * Dataset template implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +Dataset::Dataset(void) +{} + +template +Dataset::Dataset(const std::string &listFileName, + const std::string &dataName) +{ + load(listFileName, dataName); +} + +template +template +Dataset::Dataset(const Eigen::EigenBase &dataset) +: Base(dataset) +{} + +// destructor ////////////////////////////////////////////////////////////////// +template +Dataset::~Dataset(void) +{} + +// IO ////////////////////////////////////////////////////////////////////////// +template +void Dataset::load(const std::string &listFileName, + const std::string &dataName) +{ + std::ifstream listFile; + char dataFileNameBuf[MAX_PATH_LENGTH]; + std::vector dataFileName; + + listFile.open(listFileName, std::ios::in); + while (!listFile.eof()) + { + listFile.getline(dataFileNameBuf, MAX_PATH_LENGTH); + if (!std::string(dataFileNameBuf).empty()) + { + dataFileName.push_back(dataFileNameBuf); + } + } + listFile.close(); + this->resize(dataFileName.size()); + for (unsigned int i = 0; i < dataFileName.size(); ++i) + { + file_.open(dataFileName[i], File::Mode::read); + (*this)[i] = file_.template read(dataName); + file_.close(); + } +} + +// resampling ////////////////////////////////////////////////////////////////// +template +Sample Dataset::bootstrapMean(const unsigned int nSample, + RandGen& generator) +{ + unsigned int nData = this->size(); + std::vector data(nData); + Sample s(nSample); + + for (unsigned int j = 0; j < nData; ++j) + { + data[j] = &((*this)[j]); + } + ptVectorMean(s[central], data); + for (unsigned int i = 0; i < nSample; ++i) + { + for (unsigned int j = 0; j < nData; ++j) + { + data[j] = &((*this)[generator.discreteUniform(nData)]); + } + ptVectorMean(s[i], data); + } + + return s; +} + +template +void Dataset::ptVectorMean(T &m, const std::vector &v) +{ + if (v.size()) + { + m = *(v[0]); + for (unsigned int i = 1; i < v.size(); ++i) + { + m += *(v[i]); + } + m /= static_cast(v.size()); + } +} + +END_NAMESPACE + +#endif // Latan_Dataset_hpp_ diff --git a/latan/IoObject.hpp b/latan/IoObject.hpp index 252a1a2..9ea1c13 100644 --- a/latan/IoObject.hpp +++ b/latan/IoObject.hpp @@ -33,10 +33,10 @@ public: public: enum { - noType = 0, - dMat = 1, - sample = 2, - rgState = 3 + noType = 0, + dMat = 1, + dMatSample = 2, + rgState = 3 }; }; public: diff --git a/latan/Makefile.am b/latan/Makefile.am index 6520e90..58f61c5 100644 --- a/latan/Makefile.am +++ b/latan/Makefile.am @@ -45,6 +45,7 @@ liblatan_la_SOURCES = \ liblatan_ladir = $(pkgincludedir) liblatan_la_HEADERS = \ CompiledFunction.hpp\ + Dataset.hpp \ Function.hpp \ Global.hpp \ Io.hpp \ @@ -55,7 +56,8 @@ liblatan_la_HEADERS = \ ParserState.hpp \ Plot.hpp \ RandGen.hpp \ - Sample.hpp + Sample.hpp \ + StatArray.hpp liblatan_la_CFLAGS = $(COM_CFLAGS) liblatan_la_CXXFLAGS = $(COM_CXXFLAGS) diff --git a/latan/Sample.cpp b/latan/Sample.cpp index 75f0cea..2d1c9dd 100644 --- a/latan/Sample.cpp +++ b/latan/Sample.cpp @@ -21,29 +21,10 @@ #include using namespace Latan; +using namespace std; -DSample::DSample(void) -: DSampleBase(static_cast(0)) -{} - -DSample::DSample(const unsigned int nSample, const unsigned int nRow, - const unsigned int nCol) -: DSampleBase(static_cast(nSample + 1)) +template <> +unsigned int Sample::getType(void) const { - for (int s = 0; s < size(); ++s) - { - (*this)(s).resize(nRow, nCol); - } -} - -DMat& DSample::operator()(const int s) -{ - if (s >= 0) - { - return (*this)(s + 1); - } - else - { - return (*this)(0); - } + return IoType::dMatSample; } diff --git a/latan/Sample.hpp b/latan/Sample.hpp index 4ec3a47..228a1ce 100644 --- a/latan/Sample.hpp +++ b/latan/Sample.hpp @@ -21,26 +21,98 @@ #define Latan_Sample_hpp_ #include +#include #include +#include BEGIN_NAMESPACE -const int Central = -1; +const int central = -1; -typedef Eigen::Array DSampleBase; - -class DSample: public DSampleBase +/****************************************************************************** + * Sample class * + ******************************************************************************/ +template +class Sample: public StatArray, public IoObject { +private: + typedef StatArray Base; public: - // Constructors/destructor - DSample(void); - DSample(const unsigned int nSample, const unsigned int nRow, - const unsigned int nCol); - ~DSample(void); - // Operators - DMat& operator()(const int s); + // constructors + Sample(void); + Sample(const unsigned int nSample); + template + Sample(const Eigen::EigenBase &s); + // destructor + virtual ~Sample(void); + // operators + T& operator[](const int s); + // IO type + virtual unsigned int getType(void) const; +private: + // index of the first element to take into account for statistics + virtual unsigned int getOffset(void) const; }; +template <> +unsigned int Sample::getType(void) const; + +// specialization aliases +typedef Sample DMatSample; + +/****************************************************************************** + * Sample class template implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +Sample::Sample(void) +: Base(static_cast(getOffset())) +{} + +template +Sample::Sample(const unsigned int nSample) +: Base(static_cast(nSample + getOffset())) +{} + +template +template +Sample::Sample(const Eigen::EigenBase &s) +: Base(s) +{} + +// destructor ////////////////////////////////////////////////////////////////// +template +Sample::~Sample(void) +{} + +// operators /////////////////////////////////////////////////////////////////// +template +T& Sample::operator[](const int s) +{ + if (s >= 0) + { + return Base::operator[](s + 1); + } + else + { + return Base::operator[](0); + } +} + +// IO type ///////////////////////////////////////////////////////////////////// +template +unsigned int Sample::getType(void) const +{ + return IoType::noType; +} + +// statistics ////////////////////////////////////////////////////////////////// +template +unsigned int Sample::getOffset(void) const +{ + return 1u; +} + END_NAMESPACE #endif // Latan_Sample_hpp_ diff --git a/latan/StatArray.hpp b/latan/StatArray.hpp new file mode 100644 index 0000000..6dfa08a --- /dev/null +++ b/latan/StatArray.hpp @@ -0,0 +1,141 @@ +/* + * StatArray.hpp, part of LatAnalyze 3 + * + * Copyright (C) 2013 - 2014 Antonin Portelli + * + * LatAnalyze 3 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * LatAnalyze 3 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with LatAnalyze 3. If not, see . + */ + +#ifndef Latan_StatArray_hpp_ +#define Latan_StatArray_hpp_ + +#include +#include + +BEGIN_NAMESPACE + +/****************************************************************************** + * Array class with statistics * + ******************************************************************************/ +template +class StatArray: public Eigen::Array +{ +private: + typedef Eigen::Array Base; +public: + // constructors + StatArray(void); + StatArray(const unsigned int size); + template + StatArray(const Eigen::EigenBase &s); + // destructor + virtual ~StatArray(void); + // statistics + T mean(void) const; + T variance(void) const; +private: + // index of the first element to take into account for statistics + virtual unsigned int getOffset(void) const; + // operations for reduction in statistical computations + static inline T square(const T &a); + static inline T sum(const T &a, const T &b); +}; + +template <> +inline DMat StatArray::square(const DMat &a); + +/****************************************************************************** + * StatArray class template implementation * + ******************************************************************************/ +// constructors //////////////////////////////////////////////////////////////// +template +StatArray::StatArray(void) +: Base(static_cast(1)) +{} + +template +StatArray::StatArray(const unsigned int size) +: Base(static_cast(size)) +{} + +template +template +StatArray::StatArray(const Eigen::EigenBase &s) +: Base(s) +{} + +// destructor ////////////////////////////////////////////////////////////////// +template +StatArray::~StatArray(void) +{} + +// statistics ////////////////////////////////////////////////////////////////// +template +T StatArray::mean(void) const +{ + T result; + unsigned int size = this->size() - getOffset(); + + if (size) + { + result = this->tail(size).redux(&StatArray::sum); + } + + return result/static_cast(size); +} + +template +T StatArray::variance(void) const +{ + T s, sqs, result; + unsigned int size = this->size() - getOffset(); + + if (size) + { + s = this->tail(size).redux(&StatArray::sum); + sqs = this->tail(size).unaryExpr(&StatArray::square) + .redux(&StatArray::sum); + result = sqs - square(s)/static_cast(size); + } + + return result/static_cast(size - 1); +} + +template +inline T StatArray::sum(const T &a, const T &b) +{ + return a + b; +} + +template +inline T StatArray::square(const T &a) +{ + return a*a; +} + +template <> +inline DMat StatArray::square(const DMat &a) +{ + return a.cwiseProduct(a); +} + +template +unsigned int StatArray::getOffset(void) const +{ + return 0u; +} + +END_NAMESPACE + +#endif // Latan_StatArray_hpp_