/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: Hadrons/DiskVector.hpp Copyright (C) 2015-2018 Author: Antonin Portelli This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef Hadrons_DiskVector_hpp_ #define Hadrons_DiskVector_hpp_ #include #include #include #include #include #include #ifdef DV_DEBUG #define DV_DEBUG_MSG(dv, stream) LOG(Debug) << "diskvector " << (dv) << ": " << stream << std::endl #else #define DV_DEBUG_MSG(dv, stream) #endif BEGIN_HADRONS_NAMESPACE /****************************************************************************** * Abstract base class * ******************************************************************************/ template class DiskVectorBase { public: typedef T ObjectType; // helper for read/write vector access class RwAccessHelper { public: RwAccessHelper(DiskVectorBase &master, const unsigned int i) : master_(master), cmaster_(master), i_(i) {} // operator=: somebody is trying to store a vector element // write to cache and tag as modified T &operator=(const T &obj) const { auto &cache = *master_.cachePtr_; auto &modified = *master_.modifiedPtr_; auto &index = *master_.indexPtr_; DV_DEBUG_MSG(&master_, "writing to " << i_); master_.cacheInsert(i_, obj); modified[index.at(i_)] = true; return cache[index.at(i_)]; } // implicit cast to const object reference and redirection // to the const operator[] for read-only operations operator const T&() const { return cmaster_[i_]; } private: DiskVectorBase &master_; const DiskVectorBase &cmaster_; const unsigned int i_; }; public: DiskVectorBase(const std::string dirname, const unsigned int size = 0, const unsigned int cacheSize = 1, const bool clean = true); DiskVectorBase(DiskVectorBase &&v) = default; virtual ~DiskVectorBase(void); const T & operator[](const unsigned int i) const; RwAccessHelper operator[](const unsigned int i); double hitRatio(void) const; void resetStat(void); private: virtual void load(T &obj, const std::string filename) const = 0; virtual void save(const std::string filename, const T &obj) const = 0; virtual std::string filename(const unsigned int i) const; void evict(void) const; void fetch(const unsigned int i) const; void cacheInsert(const unsigned int i, const T &obj) const; void clean(void); private: std::string dirname_; unsigned int size_, cacheSize_; double access_{0.}, hit_{0.}; bool clean_; // using pointers to allow modifications when class is const // semantic: const means data unmodified, but cache modification allowed std::unique_ptr> cachePtr_; std::unique_ptr> modifiedPtr_; std::unique_ptr> indexPtr_; std::unique_ptr> freePtr_; std::unique_ptr> loadsPtr_; }; /****************************************************************************** * Specialisation for serialisable classes * ******************************************************************************/ template class SerializableDiskVector: public DiskVectorBase { public: using DiskVectorBase::DiskVectorBase; private: virtual void load(T &obj, const std::string filename) const { Reader reader(filename); read(reader, basename(filename), obj); } virtual void save(const std::string filename, const T &obj) const { Writer writer(filename); write(writer, basename(filename), obj); } }; /****************************************************************************** * Specialisation for Eigen matrices * ******************************************************************************/ template using EigenDiskVectorMat = A2AMatrix; template class EigenDiskVector: public DiskVectorBase> { public: using DiskVectorBase>::DiskVectorBase; typedef EigenDiskVectorMat Matrix; public: T operator()(const unsigned int i, const Eigen::Index j, const Eigen::Index k) const { return (*this)[i](j, k); } private: virtual void load(EigenDiskVectorMat &obj, const std::string filename) const { std::ifstream f(filename, std::ios::binary); uint32_t crc, check; Eigen::Index nRow, nCol; size_t matSize; double tRead, tHash; f.read(reinterpret_cast(&crc), sizeof(crc)); f.read(reinterpret_cast(&nRow), sizeof(nRow)); f.read(reinterpret_cast(&nCol), sizeof(nCol)); obj.resize(nRow, nCol); matSize = nRow*nCol*sizeof(T); tRead = -usecond(); f.read(reinterpret_cast(obj.data()), matSize); tRead += usecond(); tHash = -usecond(); #ifdef USE_IPP check = GridChecksum::crc32c(obj.data(), matSize); #else check = GridChecksum::crc32(obj.data(), matSize); #endif tHash += usecond(); DV_DEBUG_MSG(this, "Eigen read " << tRead/1.0e6 << " sec " << matSize/tRead*1.0e6/1024/1024 << " MB/s"); DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec << " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s"); if (crc != check) { HADRONS_ERROR(Io, "checksum failed") } } virtual void save(const std::string filename, const EigenDiskVectorMat &obj) const { std::ofstream f(filename, std::ios::binary); uint32_t crc; Eigen::Index nRow, nCol; size_t matSize; double tWrite, tHash; nRow = obj.rows(); nCol = obj.cols(); matSize = nRow*nCol*sizeof(T); tHash = -usecond(); #ifdef USE_IPP crc = GridChecksum::crc32c(obj.data(), matSize); #else crc = GridChecksum::crc32(obj.data(), matSize); #endif tHash += usecond(); f.write(reinterpret_cast(&crc), sizeof(crc)); f.write(reinterpret_cast(&nRow), sizeof(nRow)); f.write(reinterpret_cast(&nCol), sizeof(nCol)); tWrite = -usecond(); f.write(reinterpret_cast(obj.data()), matSize); tWrite += usecond(); DV_DEBUG_MSG(this, "Eigen write " << tWrite/1.0e6 << " sec " << matSize/tWrite*1.0e6/1024/1024 << " MB/s"); DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec << " " << tHash/1.0e6 << " sec " << matSize/tHash*1.0e6/1024/1024 << " MB/s"); } }; /****************************************************************************** * DiskVectorBase implementation * ******************************************************************************/ template DiskVectorBase::DiskVectorBase(const std::string dirname, const unsigned int size, const unsigned int cacheSize, const bool clean) : dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean) , cachePtr_(new std::vector(size)) , modifiedPtr_(new std::vector(size, false)) , indexPtr_(new std::map()) , freePtr_(new std::stack) , loadsPtr_(new std::deque()) { struct stat s; if(stat(dirname.c_str(), &s) == 0) { HADRONS_ERROR(Io, "directory '" + dirname + "' already exists") } mkdir(dirname); for (unsigned int i = 0; i < cacheSize_; ++i) { freePtr_->push(i); } } template DiskVectorBase::~DiskVectorBase(void) { if (clean_) { clean(); } } template const T & DiskVectorBase::operator[](const unsigned int i) const { auto &cache = *cachePtr_; auto &index = *indexPtr_; auto &freeInd = *freePtr_; auto &loads = *loadsPtr_; DV_DEBUG_MSG(this, "accessing " << i << " (RO)"); if (i >= size_) { HADRONS_ERROR(Size, "index out of range"); } const_cast(access_)++; if (index.find(i) == index.end()) { // cache miss DV_DEBUG_MSG(this, "cache miss"); fetch(i); } else { DV_DEBUG_MSG(this, "cache hit"); auto pos = std::find(loads.begin(), loads.end(), i); const_cast(hit_)++; loads.erase(pos); loads.push_back(i); } #ifdef DV_DEBUG std::string msg; for (auto &p: loads) { msg += std::to_string(p) + " "; } DV_DEBUG_MSG(this, "in cache: " << msg); #endif return cache[index.at(i)]; } template typename DiskVectorBase::RwAccessHelper DiskVectorBase::operator[](const unsigned int i) { DV_DEBUG_MSG(this, "accessing " << i << " (RW)"); if (i >= size_) { HADRONS_ERROR(Size, "index out of range"); } return RwAccessHelper(*this, i); } template double DiskVectorBase::hitRatio(void) const { return hit_/access_; } template void DiskVectorBase::resetStat(void) { access_ = 0.; hit_ = 0.; } template std::string DiskVectorBase::filename(const unsigned int i) const { return dirname_ + "/elem_" + std::to_string(i); } template void DiskVectorBase::evict(void) const { auto &cache = *cachePtr_; auto &modified = *modifiedPtr_; auto &index = *indexPtr_; auto &freeInd = *freePtr_; auto &loads = *loadsPtr_; if (index.size() >= cacheSize_) { unsigned int i = loads.front(); DV_DEBUG_MSG(this, "evicting " << i); if (modified[index.at(i)]) { DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk"); save(filename(i), cache[index.at(i)]); } freeInd.push(index.at(i)); index.erase(i); loads.pop_front(); } } template void DiskVectorBase::fetch(const unsigned int i) const { auto &cache = *cachePtr_; auto &modified = *modifiedPtr_; auto &index = *indexPtr_; auto &freeInd = *freePtr_; auto &loads = *loadsPtr_; struct stat s; DV_DEBUG_MSG(this, "loading " << i << " from disk"); evict(); if(stat(filename(i).c_str(), &s) != 0) { HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised"); } index[i] = freeInd.top(); freeInd.pop(); load(cache[index.at(i)], filename(i)); loads.push_back(i); modified[index.at(i)] = false; } template void DiskVectorBase::cacheInsert(const unsigned int i, const T &obj) const { auto &cache = *cachePtr_; auto &modified = *modifiedPtr_; auto &index = *indexPtr_; auto &freeInd = *freePtr_; auto &loads = *loadsPtr_; evict(); index[i] = freeInd.top(); freeInd.pop(); cache[index.at(i)] = obj; loads.push_back(i); modified[index.at(i)] = false; #ifdef DV_DEBUG std::string msg; for (auto &p: loads) { msg += std::to_string(p) + " "; } DV_DEBUG_MSG(this, "in cache: " << msg); #endif } #ifdef DV_DEBUG #undef DV_DEBUG_MSG #endif template void DiskVectorBase::clean(void) { auto unlink = [](const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { int rv = remove(fpath); if (rv) { HADRONS_ERROR(Io, "cannot remove '" + std::string(fpath) + "': " + std::string(std::strerror(errno))); } return rv; }; nftw(dirname_.c_str(), unlink, 64, FTW_DEPTH | FTW_PHYS); } END_HADRONS_NAMESPACE #endif // Hadrons_DiskVector_hpp_