1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Hadrons: disk-based container

This commit is contained in:
Antonin Portelli 2018-09-07 20:04:54 +01:00
parent 2bf3be5fae
commit 12c7c493bf
4 changed files with 353 additions and 1 deletions

293
Hadrons/DiskVector.hpp Normal file
View File

@ -0,0 +1,293 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: Hadrons/DiskVector.hpp
Copyright (C) 2015-2018
Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_DiskVector_hpp_
#define Hadrons_DiskVector_hpp_
#include <Hadrons/Global.hpp>
#include <queue>
#include <sys/stat.h>
#include <ftw.h>
#include <unistd.h>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Abstract base class *
******************************************************************************/
template <typename T>
class DiskVectorBase
{
public:
typedef T ObjectType;
// helper for read/write vector access
class RwAccessHelper
{
public:
RwAccessHelper(DiskVectorBase<T> &master, const unsigned int i)
: master_(master), cmaster_(master), i_(i) {}
// operator=: somebody is trying to store a vector element
// write to disk and cache
T &operator=(const T &obj) const
{
master_.cacheInsert(i_, obj);
master_.save(master_.filename(i_), obj);
return master_.cachePtr_->at(i_);
}
// implicit cast to const object reference and redirection
// to the const operator[] for read-only operations
operator const T&() const
{
return cmaster_[i_];
}
private:
DiskVectorBase<T> &master_;
const DiskVectorBase<T> &cmaster_;
const unsigned int i_;
};
public:
DiskVectorBase(const std::string dirname, const unsigned int size = 0,
const unsigned int cacheSize = 1, const bool clean = true);
virtual ~DiskVectorBase(void);
const T & operator[](const unsigned int i) const;
RwAccessHelper operator[](const unsigned int i);
private:
virtual void load(T &obj, const std::string filename) const = 0;
virtual void save(const std::string filename, const T &obj) const = 0;
virtual std::string filename(const unsigned int i) const;
void evict(void) const;
void fetch(const unsigned int i) const;
void cacheInsert(const unsigned int i, const T &obj) const;
void clean(void);
private:
std::string dirname_;
unsigned int size_, cacheSize_;
bool clean_;
// using pointers to allow modifications when class is const
// semantic: const means data unmodified, but cache modification allowed
std::unique_ptr<std::map<unsigned int, T>> cachePtr_;
std::unique_ptr<std::queue<unsigned int>> loadsPtr_;
};
/******************************************************************************
* Specialisation for serialisable classes *
******************************************************************************/
template <typename T, typename Reader, typename Writer>
class SerializableDiskVector: public DiskVectorBase<T>
{
public:
using DiskVectorBase<T>::DiskVectorBase;
private:
virtual void load(T &obj, const std::string filename) const
{
Reader reader(filename);
read(reader, basename(filename), obj);
}
virtual void save(const std::string filename, const T &obj) const
{
Writer writer(filename);
write(writer, basename(filename), obj);
}
};
/******************************************************************************
* DiskVectorBase implementation *
******************************************************************************/
#ifdef DV_DEBUG
#define DV_DEBUG_MSG(stream) LOG(Debug) << "diskvector " << this << ": " << stream << std::endl
#endif
template <typename T>
DiskVectorBase<T>::DiskVectorBase(const std::string dirname,
const unsigned int size,
const unsigned int cacheSize,
const bool clean)
: dirname_(dirname), size_(size), cacheSize_(cacheSize), clean_(clean)
, cachePtr_(new std::map<unsigned int, T>())
, loadsPtr_(new std::queue<unsigned int>())
{
struct stat s;
if(stat(dirname.c_str(), &s) == 0)
{
HADRONS_ERROR(Io, "directory '" + dirname + "' already exists")
}
mkdir(dirname);
}
template <typename T>
DiskVectorBase<T>::~DiskVectorBase(void)
{
if (clean_)
{
clean();
}
}
template <typename T>
const T & DiskVectorBase<T>::operator[](const unsigned int i) const
{
auto &cache = *cachePtr_;
DV_DEBUG_MSG("accessing " << i << " (RO)");
if (i >= size_)
{
HADRONS_ERROR(Size, "index out of range");
}
if (cache.find(i) == cache.end())
{
// cache miss
DV_DEBUG_MSG("cache miss");
fetch(i);
}
else
{
DV_DEBUG_MSG("cache hit");
}
#ifdef DV_DEBUG
std::string msg;
for (auto &p: cache)
{
msg += std::to_string(p.first) + " ";
}
DV_DEBUG_MSG("in cache: " << msg);
#endif
return cache.at(i);
}
template <typename T>
typename DiskVectorBase<T>::RwAccessHelper DiskVectorBase<T>::operator[](const unsigned int i)
{
DV_DEBUG_MSG("accessing " << i << " (RW)");
if (i >= size_)
{
HADRONS_ERROR(Size, "index out of range");
}
RwAccessHelper h(*this, i);
return h;
}
template <typename T>
std::string DiskVectorBase<T>::filename(const unsigned int i) const
{
return dirname_ + "/elem_" + std::to_string(i);
}
template <typename T>
void DiskVectorBase<T>::evict(void) const
{
auto &cache = *cachePtr_;
auto &loads = *loadsPtr_;
if (cache.size() >= cacheSize_)
{
DV_DEBUG_MSG("evicting " << loads.front());
cache.erase(loads.front());
loads.pop();
}
}
template <typename T>
void DiskVectorBase<T>::fetch(const unsigned int i) const
{
auto &cache = *cachePtr_;
auto &loads = *loadsPtr_;
struct stat s;
DV_DEBUG_MSG("loading " << i << " from disk");
evict();
if(stat(filename(i).c_str(), &s) != 0)
{
HADRONS_ERROR(Io, "disk vector element " + std::to_string(i) + " uninitialised");
}
load(cache[i], filename(i));
loads.push(i);
}
template <typename T>
void DiskVectorBase<T>::cacheInsert(const unsigned int i, const T &obj) const
{
auto &cache = *cachePtr_;
auto &loads = *loadsPtr_;
evict();
cache[i] = obj;
loads.push(i);
#ifdef DV_DEBUG
std::string msg;
for (auto &p: cache)
{
msg += std::to_string(p.first) + " ";
}
DV_DEBUG_MSG("in cache: " << msg);
#endif
}
#ifdef DV_DEBUG
#undef DV_DEBUG_MSG
#endif
template <typename T>
void DiskVectorBase<T>::clean(void)
{
auto unlink = [](const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftwbuf)
{
int rv = remove(fpath);
if (rv)
{
HADRONS_ERROR(Io, "cannot remove '" + std::string(fpath) + "': "
+ std::string(std::strerror(errno)));
}
return rv;
};
nftw(dirname_.c_str(), unlink, 64, FTW_DEPTH | FTW_PHYS);
}
END_HADRONS_NAMESPACE
#endif // Hadrons_DiskVector_hpp_

View File

@ -18,6 +18,7 @@ nobase_libHadrons_a_HEADERS = \
A2AMatrix.hpp \
Application.hpp \
DilutedNoise.hpp \
DiskVector.hpp \
EigenPack.hpp \
Environment.hpp \
Exceptions.hpp \

View File

@ -1,3 +1,3 @@
AM_LDFLAGS += -L../../extras/Hadrons
AM_LDFLAGS += -L../../Hadrons
include Make.inc

View File

@ -0,0 +1,58 @@
#define DV_DEBUG
#include <Hadrons/DiskVector.hpp>
using namespace Grid;
using namespace Grid::QCD;
using namespace Grid::Hadrons;
GRID_SERIALIZABLE_ENUM(Enum, undef, red, 1, blue, 2, green, 3);
class Object: Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Object,
Enum, e,
SpinColourMatrix, scm);
};
#ifdef HAVE_HDF5
typedef Hdf5Reader TestReader;
typedef Hdf5Writer TestWriter;
#else
typedef BinaryReader TestReader;
typedef BinaryWriter TestWriter;
#endif
int main(int argc, char *argv[])
{
Grid_init(&argc, &argv);
GridSerialRNG rng;
Object obj, v2w, v2r, v13w, v13r;
SerializableDiskVector<Object, TestReader, TestWriter> v("diskvector_test", 1000, 2);
obj.e = Enum::red;
random(rng, obj.scm);
v[32] = obj;
random(rng, obj.scm);
v[2] = obj;
v2w = obj;
random(rng, obj.scm);
v[6] = obj;
random(rng, obj.scm);
v[12] = obj;
random(rng, obj.scm);
v[13] = obj;
v13w = obj;
v2r = v[2];
LOG(Message) << "v[2] correct? "
<< ((v2r == v2w) ? "yes" : "no" ) << std::endl;
v13r = v[13];
LOG(Message) << "v[13] correct? "
<< ((v13r == v13w) ? "yes" : "no" ) << std::endl;
Grid_finalize();
return EXIT_SUCCESS;
}