From 44a2d4854ab94abf67c86fbed8c0682452259d34 Mon Sep 17 00:00:00 2001 From: Michael Marshall Date: Fri, 22 Feb 2019 15:14:32 +0000 Subject: [PATCH] Ensured Hdf5 chunk size always less than 4GB --- Grid/serialisation/Hdf5IO.h | 36 ++++++++++++++++++++++++++-- tests/hadrons/Test_hadrons_distil.cc | 5 +++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/Grid/serialisation/Hdf5IO.h b/Grid/serialisation/Hdf5IO.h index c1bb891c..8ec6a8f5 100644 --- a/Grid/serialisation/Hdf5IO.h +++ b/Grid/serialisation/Hdf5IO.h @@ -105,11 +105,23 @@ namespace Grid template <> void Hdf5Writer::writeDefault(const std::string &s, const std::string &x); + static hsize_t alignup(hsize_t n) + { + n--; // 1000 0011 --> 1000 0010 + n |= n >> 1; // 1000 0010 | 0100 0001 = 1100 0011 + n |= n >> 2; // 1100 0011 | 0011 0000 = 1111 0011 + n |= n >> 4; // 1111 0011 | 0000 1111 = 1111 1111 + n |= n >> 8; // ... (At this point all bits are 1, so further bitwise-or + n |= n >> 16; // operations produce no effect.) + n++; // 1111 1111 --> 1 0000 0000 + return n; + }; + template void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector & Dimensions, const U * pDataRowMajor, size_t NumElements) { // Hdf5 needs the dimensions as hsize_t - int rank = static_cast(Dimensions.size()); + const int rank = static_cast(Dimensions.size()); std::vector dim(rank); for(int i = 0; i < rank; i++) dim[i] = Dimensions[i]; @@ -119,9 +131,29 @@ namespace Grid size_t DataSize = NumElements * sizeof(U); if (DataSize > dataSetThres_) { + // Make sure the chunk size is < 4GB + const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U ); + hsize_t ElementsPerChunk = 1; + bool bTooBig = false; + for( unsigned int i = rank - 1; i != -1; i-- ) { + if( bTooBig ) + // Chunk size is already as big as can be - remaining dimensions = 1 + dim[i] = 1; + else { + // Now make sure overall size is not too big + ElementsPerChunk *= dim[i]; + if( ElementsPerChunk >= MaxElements ) { + bTooBig = true; + hsize_t dividend = ElementsPerChunk / MaxElements; + hsize_t remainder = ElementsPerChunk % MaxElements; + if( remainder ) + dividend++; + dim[i] = dim[i] / dividend; + } + } + } H5NS::DataSet dataSet; H5NS::DSetCreatPropList plist; - plist.setChunk(rank, dim.data()); plist.setFletcher32(); dataSet = group_.createDataSet(s, Hdf5Type::type(), dataSpace, plist); diff --git a/tests/hadrons/Test_hadrons_distil.cc b/tests/hadrons/Test_hadrons_distil.cc index 655be9cf..40d6a4bf 100644 --- a/tests/hadrons/Test_hadrons_distil.cc +++ b/tests/hadrons/Test_hadrons_distil.cc @@ -655,7 +655,10 @@ int main(int argc, char *argv[]) << ", sizeof(size_t) = " << sizeof(size_t) << ", sizeof(std::size_t) = " << sizeof(std::size_t) << ", sizeof(std::streamsize) = " << sizeof(std::streamsize) - << ", sizeof(Eigen::Index) = " << sizeof(Eigen::Index) << std::endl; + << ", sizeof(Eigen::Index) = " << sizeof(Eigen::Index) + << ", sizeof(hsize_t) = " << sizeof(hsize_t) + << ", sizeof(unsigned long long) = " << sizeof(unsigned long long) + << std::endl; if( DebugEigenTest() ) return 0; if(DebugGridTensorTest()) return 0; #endif