Chunking layout reasonably efficient. Looks for small prime factors of each dimension, falling back to approximate size if needed.

2025-12-22 05:34:30 +00:00 · 2019-02-25 11:07:29 +00:00
parent 44a2d4854a
commit 3c9f2d4106
2 changed files with 63 additions and 34 deletions
--- a/Grid/serialisation/Hdf5IO.h
+++ b/Grid/serialisation/Hdf5IO.h
@@ -3,6 +3,7 @@
 #include <stack>
 #include <string>
 #include <list>
 #include <vector>
 #include <H5Cpp.h>
 #include <Grid/tensors/Tensors.h>
@@ -105,18 +106,17 @@ namespace Grid
  template <>
  void Hdf5Writer::writeDefault(const std::string &s, const std::string &x);
-  static hsize_t alignup(hsize_t n)
+  class SortNode {
-  {
+  public:
-    n--;           // 1000 0011 --> 1000 0010
+    int index;
-    n |= n >> 1;   // 1000 0010 | 0100 0001 = 1100 0011
+    hsize_t dimsize;
-    n |= n >> 2;   // 1100 0011 | 0011 0000 = 1111 0011
+    //bool operator<(const SortNode &r) { return dimsize < r.dimsize || (dimsize == r.dimsize && index < r.index); }
-    n |= n >> 4;   // 1111 0011 | 0000 1111 = 1111 1111
+    //SortNode() = default;
-    n |= n >> 8;   // ... (At this point all bits are 1, so further bitwise-or
+    SortNode(int Index, hsize_t DimSize) : index{Index}, dimsize{DimSize} {}
    n |= n >> 16;  //      operations produce no effect.)
    n++;           // 1111 1111 --> 1 0000 0000
    return n;
  };
  bool operator<(const SortNode &l, const SortNode &r) { return l.dimsize < r.dimsize || (l.dimsize == r.dimsize && l.index < r.index); }
  template <typename U>
  void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
  {
@@ -125,30 +125,64 @@ namespace Grid
    std::vector<hsize_t> dim(rank);
    for(int i = 0; i < rank; i++)
      dim[i] = Dimensions[i];
-    // write to file
+    // write the entire dataset to file
    H5NS::DataSpace dataSpace(rank, dim.data());
    size_t DataSize = NumElements * sizeof(U);
    if (DataSize > dataSetThres_)
    {
-      // Make sure the chunk size is < 4GB
+      // First few prime numbers from https://oeis.org/A000040
      static const unsigned short Primes[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31,
        37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109,
        113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
        197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271 };
      constexpr int NumPrimes = sizeof( Primes ) / sizeof( Primes[0] );
      // Make sure 1) each dimension; and 2) chunk size is < 4GB
      const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U );
      hsize_t ElementsPerChunk = 1;
      bool bTooBig = false;
-      for( unsigned int i = rank - 1; i != -1; i-- ) {
+      for( int i = rank - 1 ; i != -1 ; i-- ) {
        auto &d = dim[i];
        if( bTooBig )
-          // Chunk size is already as big as can be - remaining dimensions = 1
+          d = 1; // Chunk size is already as big as can be - remaining dimensions = 1
          dim[i] = 1;
        else {
          // If individual dimension too big, reduce by prime factors if possible
          for( int PrimeIdx = 0; d > MaxElements && PrimeIdx < NumPrimes; ) {
            if( d % Primes[PrimeIdx] )
              PrimeIdx++;
            else
              d /= Primes[PrimeIdx];
          }
          const char ErrorMsg[] = " dimension > 4GB without small prime factors. "
                                  "Hdf5IO chunk size will be inefficient.";
          if( d > MaxElements ) {
            std::cout << GridLogMessage << "Individual" << ErrorMsg << std::endl;
            hsize_t quotient = d / MaxElements;
            if( d % MaxElements )
              quotient++;
            d /= quotient;
          }
          // Now make sure overall size is not too big
-          ElementsPerChunk *= dim[i];
+          hsize_t OverflowCheck = ElementsPerChunk;
-          if( ElementsPerChunk >= MaxElements ) {
+          ElementsPerChunk *= d;
          assert( OverflowCheck == ElementsPerChunk / d && "Product of dimensions overflowed hsize_t" );
          // If product of dimensions too big, reduce by prime factors
          for( int PrimeIdx = 0; ElementsPerChunk > MaxElements && PrimeIdx < NumPrimes; ) {
            bTooBig = true;
-            hsize_t dividend  = ElementsPerChunk / MaxElements;
+            if( d % Primes[PrimeIdx] )
-            hsize_t remainder = ElementsPerChunk % MaxElements;
+              PrimeIdx++;
-            if( remainder )
+            else {
-              dividend++;
+              d /= Primes[PrimeIdx];
-            dim[i] = dim[i] / dividend;
+              ElementsPerChunk /= Primes[PrimeIdx];
            }
          }
          if( ElementsPerChunk > MaxElements ) {
            std::cout << GridLogMessage << "Product of" << ErrorMsg << std::endl;
            hsize_t quotient = ElementsPerChunk / MaxElements;
            if( ElementsPerChunk % MaxElements )
              quotient++;
            d /= quotient;
            ElementsPerChunk /= quotient;
          }
        }
      }
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -114,7 +114,7 @@ typedef Eigen::TensorFixedSize<TestScalar, Eigen::Sizes<9,4,2>, Eigen::StorageOp
 typedef std::vector<Tensor_9_4_2> aTensor_9_4_2;
 typedef Eigen::TensorFixedSize<SpinColourVector, Eigen::Sizes<6,5>> LSCTensor;
 #ifdef DEBUG
-typedef Eigen::TensorFixedSize<iMatrix<iVector<iMatrix<iVector<LorentzColourMatrix,5>,2>,7>,3>, Eigen::Sizes<2,2,11,10,9>, Eigen::StorageOptions::RowMajor> LCMTensor;
+typedef Eigen::TensorFixedSize<iMatrix<iVector<iMatrix<iVector<LorentzColourMatrix,5>,2>,7>,3>, Eigen::Sizes<2,4,11,10,9>, Eigen::StorageOptions::RowMajor> LCMTensor;
 #endif
 class PerambIOTestClass: Serializable {
@@ -132,9 +132,6 @@ public:
                                  , Tensor_9_4_2,             tensor_9_4_2
                                  , aTensor_9_4_2,            atensor_9_4_2
                                  , LSCTensor,                MyLSCTensor
 #ifdef DEBUG
                                  , LCMTensor,                MyLCMTensor
 #endif
                                  );
  PerambIOTestClass()
  : DistilParameterNames {"alpha", "beta", "gamma", "delta", "epsilon", "zeta"}
@@ -153,23 +150,19 @@ public:
    for( auto &t : atensor_9_4_2 )
      SequentialInit(t, Flag);
    SequentialInit( MyLSCTensor );
 #ifdef DEBUG
    SequentialInit( MyLCMTensor );
 #endif
  }
 };
 #define RDR_ Hdf5Reader
 #define WTR_ Hdf5Writer
 #define TensorWriteReadInnerNoInit( T ) \
-  filename = "iotest_" + std::to_string(++TestNum) + "_" #T ".h5"; \
+  filename = "iotest_" + std::to_string(++TestNum) + "_" #T + pszExtension; \
  ioTest<WTR_, RDR_, T>(filename, t, #T, #T);
 #define TensorWriteReadInner( T )  SequentialInit( t ); TensorWriteReadInnerNoInit( T )
 #define TensorWriteRead( T      ) { T t               ; TensorWriteReadInner( T ) }
 #define TensorWriteReadV(T, ... ) { T t( __VA_ARGS__ ); TensorWriteReadInner( T ) }
 #define TensorWriteReadLarge( T ) { std::unique_ptr<T> p{new T}; T &t{*p}; TensorWriteReadInnerNoInit(T) }
-void EigenHdf5IOTest(void)
+template <typename WTR_, typename RDR_>
 void EigenHdf5IOTest(const char * pszExtension)
 {
  unsigned int TestNum = 0;
  std::string filename;
@@ -300,8 +293,10 @@ int main(int argc,char **argv)
  ioTest<Hdf5Writer, Hdf5Reader>("iotest.h5", obj, "HDF5   (object)           ");
  ioTest<Hdf5Writer, Hdf5Reader>("iotest.h5", vec, "HDF5   (vector of objects)");
  std::cout << "\n==== detailed Hdf5 tensor tests (Grid::EigenIO)" << std::endl;
-  EigenHdf5IOTest();
+  EigenHdf5IOTest<Hdf5Writer, Hdf5Reader>(".h5");
 #endif
  std::cout << "\n==== detailed binary tensor tests (Grid::EigenIO)" << std::endl;
  EigenHdf5IOTest<BinaryWriter, BinaryReader>(".bin");
  std::cout << "\n==== vector flattening/reconstruction" << std::endl;
  typedef std::vector<std::vector<std::vector<double>>> vec3d;