/*
 * Dataset.hpp, part of LatAnalyze 3
 *
 * Copyright (C) 2013 - 2015 Antonin Portelli
 *
 * LatAnalyze 3 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * LatAnalyze 3 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LatAnalyze 3.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef Latan_Dataset_hpp_
#define Latan_Dataset_hpp_

#include <LatAnalyze/Global.hpp>
#include <LatAnalyze/File.hpp>
#include <LatAnalyze/StatArray.hpp>
#include <LatAnalyze/RandGen.hpp>
#include <fstream>
#include <vector>

BEGIN_LATAN_NAMESPACE

/******************************************************************************
 *                              Dataset class                                 *
 ******************************************************************************/
template <typename T>
class Dataset: public StatArray<T>
{
public:
    // constructors
    Dataset(void) = default;
    Dataset(const Index size);
    EIGEN_EXPR_CTOR(Dataset, Dataset<T>, StatArray<T>, ArrayExpr)
    // destructor
    virtual ~Dataset(void) = default;
    // IO
    template <typename FileType>
    void load(const std::string &listFileName, const std::string &dataName);
    // resampling
    Sample<T> bootstrapMean(const Index nSample, RandGen& generator);
private:
    // mean from pointer vector for resampling
    void ptVectorMean(T &m, const std::vector<const T *> &v);
};

/******************************************************************************
 *                      Dataset template implementation                       *
 ******************************************************************************/
// constructors ////////////////////////////////////////////////////////////////
template <typename T>
Dataset<T>::Dataset(const Index size)
: StatArray<T>(size)
{}

// IO //////////////////////////////////////////////////////////////////////////
template <typename T>
template <typename FileType>
void Dataset<T>::load(const std::string &listFileName,
                      const std::string &dataName)
{
    FileType file;
    std::vector<std::string> dataFileName;

    dataFileName = readManifest(listFileName);
    this->resize(dataFileName.size());
    for (Index i = 0; i < static_cast<Index>(dataFileName.size()); ++i)
    {
        file.open(dataFileName[i], File::Mode::read);
        (*this)[i] = file.template read<T>(dataName);
        file.close();
    }
}

// resampling //////////////////////////////////////////////////////////////////
template <typename T>
Sample<T> Dataset<T>::bootstrapMean(const Index nSample, RandGen& generator)
{
    typedef typename std::vector<const T *>::size_type size_type;

    size_type              nData = static_cast<size_type>(this->size());
    std::vector<const T *> data(nData);
    Sample<T> s(nSample);
    
    for (size_type j = 0; j < nData; ++j)
    {
        data[j] = &((*this)[static_cast<Index>(j)]);
    }
    ptVectorMean(s[central], data);
    for (Index i = 0; i < nSample; ++i)
    {
        for (size_type j = 0; j < nData; ++j)
        {
            Index k= static_cast<Index>(generator.discreteUniform(static_cast<unsigned int>(nData)));

            data[j] = &((*this)[k]);
        }
        ptVectorMean(s[i], data);
    }
    
    return s;
}

template <typename T>
void Dataset<T>::ptVectorMean(T &m, const std::vector<const T *> &v)
{
    if (v.size())
    {
        m = *(v[0]);
        for (unsigned int i = 1; i < v.size(); ++i)
        {
            m += *(v[i]);
        }
        m /= static_cast<double>(v.size());
    }
}

END_LATAN_NAMESPACE

#endif // Latan_Dataset_hpp_