1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-11 14:40:46 +01:00

Hadrons: DiskVector save-on-eviction and faster CRC32 for Eigen matrices

This commit is contained in:
Antonin Portelli 2018-10-18 17:48:25 +01:00
parent f709329d96
commit f333f3e575
2 changed files with 63 additions and 43 deletions

View File

@ -34,10 +34,11 @@ extern "C" {
class GridChecksum class GridChecksum
{ {
public: public:
static inline uint32_t crc32(void *data,size_t bytes) static inline uint32_t crc32(const void *data,size_t bytes)
{ {
return ::crc32(0L,(unsigned char *)data,bytes); return ::crc32(0L,(unsigned char *)data,bytes);
} }
template <typename T> template <typename T>
static inline std::string sha256_string(const std::vector<T> &hash) static inline std::string sha256_string(const std::vector<T> &hash)
{ {

View File

@ -59,14 +59,18 @@ public:
: master_(master), cmaster_(master), i_(i) {} : master_(master), cmaster_(master), i_(i) {}
// operator=: somebody is trying to store a vector element // operator=: somebody is trying to store a vector element
// write to disk and cache // write to cache and tag as modified
T &operator=(const T &obj) const T &operator=(const T &obj) const
{ {
auto &cache = *master_.cachePtr_;
auto &modified = *master_.modifiedPtr_;
auto &index = *master_.indexPtr_;
DV_DEBUG_MSG(&master_, "writing to " << i_); DV_DEBUG_MSG(&master_, "writing to " << i_);
master_.cacheInsert(i_, obj); master_.cacheInsert(i_, obj);
master_.save(master_.filename(i_), obj); modified[index.at(i_)] = true;
return master_.cachePtr_->at(i_); return cache[index.at(i_)];
} }
// implicit cast to const object reference and redirection // implicit cast to const object reference and redirection
@ -98,13 +102,14 @@ private:
void cacheInsert(const unsigned int i, const T &obj) const; void cacheInsert(const unsigned int i, const T &obj) const;
void clean(void); void clean(void);
private: private:
std::string dirname_; std::string dirname_;
unsigned int size_, cacheSize_; unsigned int size_, cacheSize_;
double access_{0.}, hit_{0.}; double access_{0.}, hit_{0.};
bool clean_; bool clean_;
// using pointers to allow modifications when class is const // using pointers to allow modifications when class is const
// semantic: const means data unmodified, but cache modification allowed // semantic: const means data unmodified, but cache modification allowed
std::unique_ptr<std::vector<T>> cachePtr_; std::unique_ptr<std::vector<T>> cachePtr_;
std::unique_ptr<std::vector<bool>> modifiedPtr_;
std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_; std::unique_ptr<std::map<unsigned int, unsigned int>> indexPtr_;
std::unique_ptr<std::stack<unsigned int>> freePtr_; std::unique_ptr<std::stack<unsigned int>> freePtr_;
std::unique_ptr<std::deque<unsigned int>> loadsPtr_; std::unique_ptr<std::deque<unsigned int>> loadsPtr_;
@ -149,24 +154,24 @@ public:
private: private:
virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const virtual void load(EigenDiskVectorMat<T> &obj, const std::string filename) const
{ {
std::ifstream f(filename, std::ios::binary); std::ifstream f(filename, std::ios::binary);
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); uint32_t crc, check;
Eigen::Index nRow, nCol; Eigen::Index nRow, nCol;
size_t matSize; size_t matSize;
double t; double t;
f.read(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); f.read(reinterpret_cast<char *>(&crc), sizeof(crc));
f.read(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); f.read(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.read(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index)); f.read(reinterpret_cast<char *>(&nCol), sizeof(nCol));
obj.resize(nRow, nCol); obj.resize(nRow, nCol);
matSize = nRow*nCol*sizeof(T); matSize = nRow*nCol*sizeof(T);
t = -usecond(); t = -usecond();
f.read(reinterpret_cast<char *>(obj.data()), matSize); f.read(reinterpret_cast<char *>(obj.data()), matSize);
t += usecond(); t += usecond();
DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s"); DV_DEBUG_MSG(this, "Eigen read " << matSize/t*1.0e6/1024/1024 << " MB/s");
auto check = GridChecksum::sha256(obj.data(), matSize); check = GridChecksum::crc32(obj.data(), matSize);
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(check)); DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << check << std::dec);
if (hash != check) if (crc != check)
{ {
HADRONS_ERROR(Io, "checksum failed") HADRONS_ERROR(Io, "checksum failed")
} }
@ -174,24 +179,24 @@ private:
virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const virtual void save(const std::string filename, const EigenDiskVectorMat<T> &obj) const
{ {
std::ofstream f(filename, std::ios::binary); std::ofstream f(filename, std::ios::binary);
std::vector<unsigned char> hash(SHA256_DIGEST_LENGTH); uint32_t crc;
Eigen::Index nRow, nCol; Eigen::Index nRow, nCol;
size_t matSize; size_t matSize;
double t; double t;
nRow = obj.rows(); nRow = obj.rows();
nCol = obj.cols(); nCol = obj.cols();
matSize = nRow*nCol*sizeof(T); matSize = nRow*nCol*sizeof(T);
hash = GridChecksum::sha256(obj.data(), matSize); crc = GridChecksum::crc32(obj.data(), matSize);
DV_DEBUG_MSG(this, "Eigen sha256 " << GridChecksum::sha256_string(hash)); f.write(reinterpret_cast<char *>(&crc), sizeof(crc));
f.write(reinterpret_cast<char *>(hash.data()), hash.size()*sizeof(unsigned char)); f.write(reinterpret_cast<char *>(&nRow), sizeof(nRow));
f.write(reinterpret_cast<char *>(&nRow), sizeof(Eigen::Index)); f.write(reinterpret_cast<char *>(&nCol), sizeof(nCol));
f.write(reinterpret_cast<char *>(&nCol), sizeof(Eigen::Index));
t = -usecond(); t = -usecond();
f.write(reinterpret_cast<const char *>(obj.data()), matSize); f.write(reinterpret_cast<const char *>(obj.data()), matSize);
t += usecond(); t += usecond();
DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s"); DV_DEBUG_MSG(this, "Eigen write " << matSize/t*1.0e6/1024/1024 << " MB/s");
DV_DEBUG_MSG(this, "Eigen crc32 " << std::hex << crc << std::dec);
} }
}; };
@ -205,6 +210,7 @@ DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
const bool clean) const bool clean)
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean) : dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
, cachePtr_(new std::vector<T>(size)) , cachePtr_(new std::vector<T>(size))
, modifiedPtr_(new std::vector<bool>(size, false))
, indexPtr_(new std::map<unsigned int, unsigned int>()) , indexPtr_(new std::map<unsigned int, unsigned int>())
, freePtr_(new std::stack<unsigned int>) , freePtr_(new std::stack<unsigned int>)
, loadsPtr_(new std::deque<unsigned int>()) , loadsPtr_(new std::deque<unsigned int>())
@ -311,15 +317,24 @@ std::string DiskVectorBase<T>::filename(const unsigned int i) const
template <typename T> template <typename T>
void DiskVectorBase<T>::evict(void) const void DiskVectorBase<T>::evict(void) const
{ {
auto &index = *indexPtr_; auto &cache = *cachePtr_;
auto &freeInd = *freePtr_; auto &modified = *modifiedPtr_;
auto &loads = *loadsPtr_; auto &index = *indexPtr_;
auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_;
if (index.size() >= cacheSize_) if (index.size() >= cacheSize_)
{ {
DV_DEBUG_MSG(this, "evicting " << loads.front()); unsigned int i = loads.front();
freeInd.push(index.at(loads.front()));
index.erase(loads.front()); DV_DEBUG_MSG(this, "evicting " << i);
if (modified[index.at(i)])
{
DV_DEBUG_MSG(this, "element " << i << " modified, saving to disk");
save(filename(i), cache[index.at(i)]);
}
freeInd.push(index.at(i));
index.erase(i);
loads.pop_front(); loads.pop_front();
} }
} }
@ -327,10 +342,11 @@ void DiskVectorBase<T>::evict(void) const
template <typename T> template <typename T>
void DiskVectorBase<T>::fetch(const unsigned int i) const void DiskVectorBase<T>::fetch(const unsigned int i) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &index = *indexPtr_; auto &modified = *modifiedPtr_;
auto &freeInd = *freePtr_; auto &index = *indexPtr_;
auto &loads = *loadsPtr_; auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_;
struct stat s; struct stat s;
@ -346,21 +362,24 @@ void DiskVectorBase<T>::fetch(const unsigned int i) const
freeInd.pop(); freeInd.pop();
load(cache[index.at(i)], filename(i)); load(cache[index.at(i)], filename(i));
loads.push_back(i); loads.push_back(i);
modified[index.at(i)] = false;
} }
template <typename T> template <typename T>
void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
{ {
auto &cache = *cachePtr_; auto &cache = *cachePtr_;
auto &index = *indexPtr_; auto &modified = *modifiedPtr_;
auto &freeInd = *freePtr_; auto &index = *indexPtr_;
auto &loads = *loadsPtr_; auto &freeInd = *freePtr_;
auto &loads = *loadsPtr_;
evict(); evict();
index[i] = freeInd.top(); index[i] = freeInd.top();
freeInd.pop(); freeInd.pop();
cache[index.at(i)] = obj; cache[index.at(i)] = obj;
loads.push_back(i); loads.push_back(i);
modified[index.at(i)] = false;
#ifdef DV_DEBUG #ifdef DV_DEBUG
std::string msg; std::string msg;