/* * Dataset.hpp, part of LatAnalyze 3 * * Copyright (C) 2013 - 2020 Antonin Portelli * * LatAnalyze 3 is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * LatAnalyze 3 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with LatAnalyze 3. If not, see . */ #ifndef Latan_Dataset_hpp_ #define Latan_Dataset_hpp_ #include #include #include BEGIN_LATAN_NAMESPACE /****************************************************************************** * Dataset class * ******************************************************************************/ template class Dataset: public StatArray { public: typedef std::random_device::result_type SeedType; public: // constructors Dataset(void) = default; Dataset(const Index size); EIGEN_EXPR_CTOR(Dataset, Dataset, StatArray, ArrayExpr) // destructor virtual ~Dataset(void) = default; // IO template void load(const std::string &listFileName, const std::string &dataName); // resampling Sample bootstrapMean(const Index nSample, const SeedType seed); Sample bootstrapMean(const Index nSample); void dumpBootstrapSeq(std::ostream &out, const Index nSample, const SeedType seed); private: // mean from pointer vector for resampling void ptVectorMean(T &m, const std::vector &v); }; /****************************************************************************** * Dataset template implementation * ******************************************************************************/ // constructors //////////////////////////////////////////////////////////////// template Dataset::Dataset(const Index size) : StatArray(size) {} // IO ////////////////////////////////////////////////////////////////////////// template template void Dataset::load(const std::string &listFileName, const std::string &dataName) { FileType file; std::vector dataFileName; dataFileName = readManifest(listFileName); this->resize(dataFileName.size()); for (Index i = 0; i < static_cast(dataFileName.size()); ++i) { file.open(dataFileName[i], File::Mode::read); (*this)[i] = file.template read(dataName); file.close(); } } // resampling ////////////////////////////////////////////////////////////////// template Sample Dataset::bootstrapMean(const Index nSample, const SeedType seed) { std::vector data(this->size()); Sample s(nSample); std::mt19937 gen(seed); std::uniform_int_distribution dis(0, this->size() - 1); for (unsigned int j = 0; j < this->size(); ++j) { data[j] = &((*this)[static_cast(j)]); } ptVectorMean(s[central], data); for (Index i = 0; i < nSample; ++i) { for (unsigned int j = 0; j < this->size(); ++j) { data[j] = &((*this)[dis(gen)]); } ptVectorMean(s[i], data); } return s; } template Sample Dataset::bootstrapMean(const Index nSample) { std::random_device rd; return bootstrapMean(nSample, rd()); } template void Dataset::dumpBootstrapSeq(std::ostream &out, const Index nSample, const SeedType seed) { std::mt19937 gen(seed); std::uniform_int_distribution dis(0, this->size() - 1); for (Index i = 0; i < nSample; ++i) { for (unsigned int j = 0; j < this->size(); ++j) { out << dis(gen) << " " << std::endl; } out << std::endl; } } template void Dataset::ptVectorMean(T &m, const std::vector &v) { if (v.size()) { m = *(v[0]); for (unsigned int i = 1; i < v.size(); ++i) { m += *(v[i]); } m /= static_cast(v.size()); } } END_LATAN_NAMESPACE #endif // Latan_Dataset_hpp_