mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
354 lines
12 KiB
C++
354 lines
12 KiB
C++
/*************************************************************************************
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./Grid/serialisation/VectorUtils.h
|
|
|
|
Copyright (C) 2015
|
|
|
|
Author: Peter Boyle <paboyle@ed.ac.uk>
|
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
|
Author: Michael Marshall <michael.marshall@ed.ac.uk>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
|
|
#ifndef GRID_SERIALISATION_HDF5_H
|
|
#define GRID_SERIALISATION_HDF5_H
|
|
|
|
#include <stack>
|
|
#include <string>
|
|
#include <list>
|
|
#include <vector>
|
|
#include <H5Cpp.h>
|
|
#include <Grid/tensors/Tensors.h>
|
|
#include "Hdf5Type.h"
|
|
|
|
// default thresold above which datasets are used instead of attributes
|
|
#ifndef HDF5_DEF_DATASET_THRES
|
|
#define HDF5_DEF_DATASET_THRES 6u
|
|
#endif
|
|
|
|
// name guard for Grid metadata
|
|
#define HDF5_GRID_GUARD "_Grid_"
|
|
|
|
namespace Grid
|
|
{
|
|
class Hdf5Writer: public Writer<Hdf5Writer>
|
|
{
|
|
public:
|
|
Hdf5Writer(const std::string &fileName);
|
|
virtual ~Hdf5Writer(void) = default;
|
|
void push(const std::string &s);
|
|
void pop(void);
|
|
void writeDefault(const std::string &s, const char *x);
|
|
template <typename U>
|
|
void writeDefault(const std::string &s, const U &x);
|
|
template <typename U>
|
|
void writeRagged(const std::string &s, const std::vector<U> &x);
|
|
template <typename U>
|
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
|
writeDefault(const std::string &s, const std::vector<U> &x);
|
|
template <typename U>
|
|
typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
|
|
writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); }
|
|
template <typename U>
|
|
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
|
|
H5NS::Group & getGroup(void);
|
|
private:
|
|
template <typename U>
|
|
void writeSingleAttribute(const U &x, const std::string &name,
|
|
const H5NS::DataType &type);
|
|
private:
|
|
std::string fileName_;
|
|
std::vector<std::string> path_;
|
|
H5NS::H5File file_;
|
|
H5NS::Group group_;
|
|
const unsigned int dataSetThres_{HDF5_DEF_DATASET_THRES};
|
|
};
|
|
|
|
class Hdf5Reader: public Reader<Hdf5Reader>
|
|
{
|
|
public:
|
|
Hdf5Reader(const std::string &fileName, const bool readOnly = true);
|
|
virtual ~Hdf5Reader(void) = default;
|
|
bool push(const std::string &s);
|
|
void pop(void);
|
|
template <typename U>
|
|
void readDefault(const std::string &s, U &output);
|
|
template <typename U>
|
|
void readRagged(const std::string &s, std::vector<U> &x);
|
|
template <typename U>
|
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
|
readDefault(const std::string &s, std::vector<U> &x);
|
|
template <typename U>
|
|
typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
|
|
readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); }
|
|
template <typename U>
|
|
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
|
|
H5NS::Group & getGroup(void);
|
|
private:
|
|
template <typename U>
|
|
void readSingleAttribute(U &x, const std::string &name,
|
|
const H5NS::DataType &type);
|
|
private:
|
|
std::string fileName_;
|
|
std::vector<std::string> path_;
|
|
H5NS::H5File file_;
|
|
H5NS::Group group_;
|
|
unsigned int dataSetThres_;
|
|
};
|
|
|
|
// Writer template implementation ////////////////////////////////////////////
|
|
template <typename U>
|
|
void Hdf5Writer::writeSingleAttribute(const U &x, const std::string &name,
|
|
const H5NS::DataType &type)
|
|
{
|
|
H5NS::Attribute attribute;
|
|
hsize_t attrDim = 1;
|
|
H5NS::DataSpace attrSpace(1, &attrDim);
|
|
|
|
attribute = group_.createAttribute(name, type, attrSpace);
|
|
attribute.write(type, &x);
|
|
}
|
|
|
|
template <typename U>
|
|
void Hdf5Writer::writeDefault(const std::string &s, const U &x)
|
|
{
|
|
writeSingleAttribute(x, s, Hdf5Type<U>::type());
|
|
}
|
|
|
|
template <>
|
|
void Hdf5Writer::writeDefault(const std::string &s, const std::string &x);
|
|
|
|
template <typename U>
|
|
void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
|
|
{
|
|
// Hdf5 needs the dimensions as hsize_t
|
|
const int rank = static_cast<int>(Dimensions.size());
|
|
std::vector<hsize_t> dim(rank);
|
|
for(int i = 0; i < rank; i++)
|
|
dim[i] = Dimensions[i];
|
|
// write the entire dataset to file
|
|
H5NS::DataSpace dataSpace(rank, dim.data());
|
|
|
|
if (NumElements > dataSetThres_)
|
|
{
|
|
// Make sure 1) each dimension; and 2) chunk size is < 4GB
|
|
const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U );
|
|
hsize_t ElementsPerChunk = 1;
|
|
bool bTooBig = false;
|
|
for( int i = rank - 1 ; i != -1 ; i-- ) {
|
|
auto &d = dim[i];
|
|
if( bTooBig )
|
|
d = 1; // Chunk size is already as big as can be - remaining dimensions = 1
|
|
else {
|
|
// If individual dimension too big, reduce by prime factors if possible
|
|
while( d > MaxElements && ( d & 1 ) == 0 )
|
|
d >>= 1;
|
|
const char ErrorMsg[] = " dimension > 4GB and not divisible by 2^n. "
|
|
"Hdf5IO chunk size will be inefficient. NB Serialisation is not intended for large datasets - please consider alternatives.";
|
|
if( d > MaxElements ) {
|
|
std::cout << GridLogWarning << "Individual" << ErrorMsg << std::endl;
|
|
hsize_t quotient = d / MaxElements;
|
|
if( d % MaxElements )
|
|
quotient++;
|
|
d /= quotient;
|
|
}
|
|
// Now make sure overall size is not too big
|
|
hsize_t OverflowCheck = ElementsPerChunk;
|
|
ElementsPerChunk *= d;
|
|
assert( OverflowCheck == ElementsPerChunk / d && "Product of dimensions overflowed hsize_t" );
|
|
// If product of dimensions too big, reduce by prime factors
|
|
while( ElementsPerChunk > MaxElements && ( ElementsPerChunk & 1 ) == 0 ) {
|
|
bTooBig = true;
|
|
d >>= 1;
|
|
ElementsPerChunk >>= 1;
|
|
}
|
|
if( ElementsPerChunk > MaxElements ) {
|
|
std::cout << GridLogWarning << "Product of" << ErrorMsg << std::endl;
|
|
hsize_t quotient = ElementsPerChunk / MaxElements;
|
|
if( ElementsPerChunk % MaxElements )
|
|
quotient++;
|
|
d /= quotient;
|
|
ElementsPerChunk /= quotient;
|
|
}
|
|
}
|
|
}
|
|
H5NS::DataSet dataSet;
|
|
H5NS::DSetCreatPropList plist;
|
|
plist.setChunk(rank, dim.data());
|
|
plist.setFletcher32();
|
|
dataSet = group_.createDataSet(s, Hdf5Type<U>::type(), dataSpace, plist);
|
|
dataSet.write(pDataRowMajor, Hdf5Type<U>::type());
|
|
}
|
|
else
|
|
{
|
|
H5NS::Attribute attribute;
|
|
attribute = group_.createAttribute(s, Hdf5Type<U>::type(), dataSpace);
|
|
attribute.write(Hdf5Type<U>::type(), pDataRowMajor);
|
|
}
|
|
}
|
|
|
|
template <typename U>
|
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
|
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
|
|
{
|
|
if (isRegularShape(x))
|
|
{
|
|
// alias to element type
|
|
using Scalar = typename is_flattenable<std::vector<U>>::type;
|
|
|
|
// flatten the vector and getting dimensions
|
|
Flatten<std::vector<U>> flat(x);
|
|
std::vector<size_t> dim;
|
|
const auto &flatx = flat.getFlatVector();
|
|
for (auto &d: flat.getDim())
|
|
dim.push_back(d);
|
|
writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size());
|
|
}
|
|
else
|
|
{
|
|
writeRagged(s, x);
|
|
}
|
|
}
|
|
|
|
template <typename U>
|
|
void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x)
|
|
{
|
|
push(s);
|
|
writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
|
|
Hdf5Type<uint64_t>::type());
|
|
for (hsize_t i = 0; i < x.size(); ++i)
|
|
{
|
|
write(s + "_" + std::to_string(i), x[i]);
|
|
}
|
|
pop();
|
|
}
|
|
|
|
// Reader template implementation ////////////////////////////////////////////
|
|
template <typename U>
|
|
void Hdf5Reader::readSingleAttribute(U &x, const std::string &name,
|
|
const H5NS::DataType &type)
|
|
{
|
|
H5NS::Attribute attribute;
|
|
|
|
attribute = group_.openAttribute(name);
|
|
attribute.read(type, &x);
|
|
}
|
|
|
|
template <typename U>
|
|
void Hdf5Reader::readDefault(const std::string &s, U &output)
|
|
{
|
|
readSingleAttribute(output, s, Hdf5Type<U>::type());
|
|
}
|
|
|
|
template <>
|
|
void Hdf5Reader::readDefault(const std::string &s, std::string &x);
|
|
|
|
template <typename U>
|
|
void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
|
|
{
|
|
// alias to element type
|
|
using Scalar = typename is_flattenable<std::vector<U>>::type;
|
|
|
|
// read the dimensions
|
|
H5NS::DataSpace dataSpace;
|
|
std::vector<hsize_t> hdim;
|
|
hsize_t size = 1;
|
|
|
|
if (group_.attrExists(s))
|
|
{
|
|
dataSpace = group_.openAttribute(s).getSpace();
|
|
}
|
|
else
|
|
{
|
|
dataSpace = group_.openDataSet(s).getSpace();
|
|
}
|
|
hdim.resize(dataSpace.getSimpleExtentNdims());
|
|
dataSpace.getSimpleExtentDims(hdim.data());
|
|
for (auto &d: hdim)
|
|
{
|
|
dim.push_back(d);
|
|
size *= d;
|
|
}
|
|
|
|
// read the flat vector
|
|
buf.resize(size);
|
|
|
|
if (size > dataSetThres_)
|
|
{
|
|
H5NS::DataSet dataSet;
|
|
|
|
dataSet = group_.openDataSet(s);
|
|
dataSet.read(buf.data(), Hdf5Type<Scalar>::type());
|
|
}
|
|
else
|
|
{
|
|
H5NS::Attribute attribute;
|
|
|
|
attribute = group_.openAttribute(s);
|
|
attribute.read(Hdf5Type<Scalar>::type(), buf.data());
|
|
}
|
|
}
|
|
|
|
template <typename U>
|
|
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
|
|
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
|
|
{
|
|
if (H5Lexists (group_.getId(), s.c_str(), H5P_DEFAULT) > 0
|
|
&& H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0)
|
|
{
|
|
readRagged(s, x);
|
|
}
|
|
else
|
|
{
|
|
// alias to element type
|
|
using Scalar = typename is_flattenable<std::vector<U>>::type;
|
|
|
|
std::vector<size_t> dim;
|
|
std::vector<Scalar> buf;
|
|
readMultiDim( s, buf, dim );
|
|
|
|
// reconstruct the multidimensional vector
|
|
Reconstruct<std::vector<U>> r(buf, dim);
|
|
|
|
x = r.getVector();
|
|
}
|
|
}
|
|
|
|
template <typename U>
|
|
void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x)
|
|
{
|
|
uint64_t size;
|
|
|
|
push(s);
|
|
readSingleAttribute(size, HDF5_GRID_GUARD "vector_size",
|
|
Hdf5Type<uint64_t>::type());
|
|
x.resize(size);
|
|
for (hsize_t i = 0; i < x.size(); ++i)
|
|
{
|
|
read(s + "_" + std::to_string(i), x[i]);
|
|
}
|
|
pop();
|
|
}
|
|
}
|
|
|
|
#endif
|