From 936eaac8e1c1d802bdac320933e11ad12437303b Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 8 Oct 2018 19:00:50 +0100 Subject: [PATCH 1/3] function to get the sha256 string --- Grid/lattice/Lattice_rng.h | 6 +----- Grid/util/Sha.h | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index c6b9e14e..5348538c 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -392,14 +392,10 @@ namespace Grid { void SeedUniqueString(const std::string &s){ std::vector seeds; - std::stringstream sha; seeds = GridChecksum::sha256_seeds(s); - for(int i=0;i &seeds){ diff --git a/Grid/util/Sha.h b/Grid/util/Sha.h index 6cfbe0bd..b0a8cc10 100644 --- a/Grid/util/Sha.h +++ b/Grid/util/Sha.h @@ -38,7 +38,21 @@ public: { return ::crc32(0L,(unsigned char *)data,bytes); } - static inline std::vector sha256(void *data,size_t bytes) + template + static inline std::string sha256_string(const std::vector &hash) + { + std::stringstream sha; + std::string s; + + for(unsigned int i = 0; i < hash.size(); i++) + { + sha << std::hex << static_cast(hash[i]); + } + s = sha.str(); + + return s; + } + static inline std::vector sha256(const void *data,size_t bytes) { std::vector hash(SHA256_DIGEST_LENGTH); SHA256_CTX sha256; From efc0c65056135be0cadd13a24086c23ab44317dc Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 8 Oct 2018 19:02:00 +0100 Subject: [PATCH 2/3] Hadrons: DiskVector Eigen specialisation with binary I/O and sha256 correctness check --- Hadrons/DiskVector.hpp | 115 ++++++++++++++++++++++++++----- tests/hadrons/Test_diskvector.cc | 16 +++++ 2 files changed, 113 insertions(+), 18 deletions(-) diff --git a/Hadrons/DiskVector.hpp b/Hadrons/DiskVector.hpp index 231854f3..94c3e597 100644 --- a/Hadrons/DiskVector.hpp +++ b/Hadrons/DiskVector.hpp @@ -34,6 +34,12 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include +#ifdef DV_DEBUG +#define DV_DEBUG_MSG(dv, stream) LOG(Debug) << "diskvector " << (dv) << ": " << stream << std::endl +#else +#define DV_DEBUG_MSG(dv, stream) +#endif + BEGIN_HADRONS_NAMESPACE /****************************************************************************** @@ -56,9 +62,7 @@ public: // write to disk and cache T &operator=(const T &obj) const { -#ifdef DV_DEBUG - LOG(Debug) << "diskvector " << &master_ << ": writing to " << i_ << std::endl; -#endif + DV_DEBUG_MSG(&master_, "writing to " << i_); master_.cacheInsert(i_, obj); master_.save(master_.filename(i_), obj); @@ -82,6 +86,8 @@ public: virtual ~DiskVectorBase(void); const T & operator[](const unsigned int i) const; RwAccessHelper operator[](const unsigned int i); + double hitRatio(void) const; + void resetStat(void); private: virtual void load(T &obj, const std::string filename) const = 0; virtual void save(const std::string filename, const T &obj) const = 0; @@ -93,6 +99,7 @@ private: private: std::string dirname_; unsigned int size_, cacheSize_; + double access_{0.}, hit_{0.}; bool clean_; // using pointers to allow modifications when class is const // semantic: const means data unmodified, but cache modification allowed @@ -115,6 +122,7 @@ private: read(reader, basename(filename), obj); } + virtual void save(const std::string filename, const T &obj) const { Writer writer(filename); @@ -123,13 +131,70 @@ private: } }; +/****************************************************************************** + * Specialisation for Eigen matrices * + ******************************************************************************/ +template +using EigenDiskVectorMat = Eigen::Matrix; + +template +class EigenDiskVector: public DiskVectorBase> +{ +public: + using DiskVectorBase>::DiskVectorBase; + typedef EigenDiskVectorMat Matrix; +private: + virtual void load(EigenDiskVectorMat &obj, const std::string filename) const + { + std::ifstream f(filename, std::ios::binary); + std::vector hash(SHA256_DIGEST_LENGTH); + Eigen::Index nRow, nCol; + size_t matSize; + double t; + + f.read(reinterpret_cast(hash.data()), hash.size()*sizeof(unsigned char)); + f.read(reinterpret_cast(&nRow), sizeof(Eigen::Index)); + f.read(reinterpret_cast(&nCol), sizeof(Eigen::Index)); + obj.resize(nRow, nCol); + matSize = nRow*nCol*sizeof(T); + t = -usecond(); + f.read(reinterpret_cast(obj.data()), matSize); + t += usecond(); + DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s"); + auto check = GridChecksum::sha256(obj.data(), matSize); + DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check)); + if (hash != check) + { + HADRONS_ERROR(Io, "checksum failed") + } + } + + virtual void save(const std::string filename, const EigenDiskVectorMat &obj) const + { + std::ofstream f(filename, std::ios::binary); + std::vector hash(SHA256_DIGEST_LENGTH); + Eigen::Index nRow, nCol; + size_t matSize; + double t; + + nRow = obj.rows(); + nCol = obj.cols(); + matSize = nRow*nCol*sizeof(T); + hash = GridChecksum::sha256(obj.data(), matSize); + DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash)); + f.write(reinterpret_cast(hash.data()), hash.size()*sizeof(unsigned char)); + f.write(reinterpret_cast(&nRow), sizeof(Eigen::Index)); + f.write(reinterpret_cast(&nCol), sizeof(Eigen::Index)); + t = -usecond(); + f.write(reinterpret_cast(obj.data()), matSize); + t += usecond(); + DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s"); + } +}; + /****************************************************************************** * DiskVectorBase implementation * ******************************************************************************/ -#ifdef DV_DEBUG -#define DV_DEBUG_MSG(stream) LOG(Debug) << "diskvector " << this << ": " << stream << std::endl -#endif - template DiskVectorBase::DiskVectorBase(const std::string dirname, const unsigned int size, @@ -160,28 +225,29 @@ DiskVectorBase::~DiskVectorBase(void) template const T & DiskVectorBase::operator[](const unsigned int i) const { - auto &cache = *cachePtr_; - auto &loads = *loadsPtr_; + auto &cache = *cachePtr_; + auto &loads = *loadsPtr_; - DV_DEBUG_MSG("accessing " << i << " (RO)"); + DV_DEBUG_MSG(this, "accessing " << i << " (RO)"); if (i >= size_) { HADRONS_ERROR(Size, "index out of range"); } - + const_cast(access_)++; if (cache.find(i) == cache.end()) { // cache miss - DV_DEBUG_MSG("cache miss"); + DV_DEBUG_MSG(this, "cache miss"); fetch(i); } else { - DV_DEBUG_MSG("cache hit"); + DV_DEBUG_MSG(this, "cache hit"); auto pos = std::find(loads.begin(), loads.end(), i); + const_cast(hit_)++; loads.erase(pos); loads.push_back(i); } @@ -193,7 +259,7 @@ const T & DiskVectorBase::operator[](const unsigned int i) const { msg += std::to_string(p) + " "; } - DV_DEBUG_MSG("in cache: " << msg); + DV_DEBUG_MSG(this, "in cache: " << msg); #endif return cache.at(i); @@ -202,7 +268,7 @@ const T & DiskVectorBase::operator[](const unsigned int i) const template typename DiskVectorBase::RwAccessHelper DiskVectorBase::operator[](const unsigned int i) { - DV_DEBUG_MSG("accessing " << i << " (RW)"); + DV_DEBUG_MSG(this, "accessing " << i << " (RW)"); if (i >= size_) { @@ -212,6 +278,19 @@ typename DiskVectorBase::RwAccessHelper DiskVectorBase::operator[](const u return RwAccessHelper(*this, i); } +template +double DiskVectorBase::hitRatio(void) const +{ + return hit_/access_; +} + +template +void DiskVectorBase::resetStat(void) +{ + access_ = 0.; + hit_ = 0.; +} + template std::string DiskVectorBase::filename(const unsigned int i) const { @@ -226,7 +305,7 @@ void DiskVectorBase::evict(void) const if (cache.size() >= cacheSize_) { - DV_DEBUG_MSG("evicting " << loads.front()); + DV_DEBUG_MSG(this, "evicting " << loads.front()); cache.erase(loads.front()); loads.pop_front(); } @@ -239,7 +318,7 @@ void DiskVectorBase::fetch(const unsigned int i) const auto &loads = *loadsPtr_; struct stat s; - DV_DEBUG_MSG("loading " << i << " from disk"); + DV_DEBUG_MSG(this, "loading " << i << " from disk"); evict(); if(stat(filename(i).c_str(), &s) != 0) @@ -267,7 +346,7 @@ void DiskVectorBase::cacheInsert(const unsigned int i, const T &obj) const { msg += std::to_string(p) + " "; } - DV_DEBUG_MSG("in cache: " << msg); + DV_DEBUG_MSG(this, "in cache: " << msg); #endif } diff --git a/tests/hadrons/Test_diskvector.cc b/tests/hadrons/Test_diskvector.cc index 363ae2ce..10bc4db1 100644 --- a/tests/hadrons/Test_diskvector.cc +++ b/tests/hadrons/Test_diskvector.cc @@ -91,6 +91,22 @@ int main(int argc, char *argv[]) v13r = v[13]; LOG(Message) << "v[13] correct? " << ((v13r == v13w) ? "yes" : "no" ) << std::endl; + LOG(Message) << "hit ratio " << v.hitRatio() << std::endl; + + EigenDiskVector w("eigendiskvector_test", 1000, 4); + EigenDiskVector::Matrix m,n; + + w[2] = EigenDiskVectorMat::Random(2000, 2000); + m = w[2]; + w[3] = EigenDiskVectorMat::Random(2000, 2000); + w[4] = EigenDiskVectorMat::Random(2000, 2000); + w[5] = EigenDiskVectorMat::Random(2000, 2000); + w[6] = EigenDiskVectorMat::Random(2000, 2000); + w[7] = EigenDiskVectorMat::Random(2000, 2000); + n = w[2]; + LOG(Message) << "w[2] correct? " + << ((m == n) ? "yes" : "no" ) << std::endl; + LOG(Message) << "hit ratio " << w.hitRatio() << std::endl; Grid_finalize(); From 3023287fd98cebc086d5f9bcd82e316394200722 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 16 Oct 2018 14:44:14 +0100 Subject: [PATCH 3/3] Hadrons: 3-index RO access to Eigen disk vector --- Hadrons/DiskVector.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Hadrons/DiskVector.hpp b/Hadrons/DiskVector.hpp index 94c3e597..0b4bffe1 100644 --- a/Hadrons/DiskVector.hpp +++ b/Hadrons/DiskVector.hpp @@ -143,6 +143,12 @@ class EigenDiskVector: public DiskVectorBase> public: using DiskVectorBase>::DiskVectorBase; typedef EigenDiskVectorMat Matrix; +public: + T operator()(const unsigned int i, const Eigen::Index j, + const Eigen::Index k) const + { + return (*this)[i](j, k); + } private: virtual void load(EigenDiskVectorMat &obj, const std::string filename) const {