Switched to Hdf5 format for perambulators. Ready for first test on Tesseract.

2025-10-13 20:54:43 +01:00 · 2019-04-28 17:53:42 +01:00
parent 5aca4e8670
commit adc1eaee68
5 changed files with 179 additions and 63 deletions
--- a/Hadrons/Distil.hpp
+++ b/Hadrons/Distil.hpp
@@ -258,34 +258,83 @@ inline GridCartesian * MakeLowerDimGrid( GridCartesian * gridHD )

 ******************************************************************************/

-template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size = sizeof(Scalar_)>
-class NamedTensor : public Eigen::Tensor<Scalar_, NumIndices_, Eigen::RowMajor>
+template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size_ = sizeof(Scalar_)>
+class NamedTensor : Serializable
 {
 public:
-  typedef Eigen::Tensor<Scalar_, NumIndices_, Eigen::RowMajor> ET;
-  std::array<std::string,NumIndices_> IndexNames;
+  using Scalar = Scalar_;
+  static constexpr int NumIndices = NumIndices_;
+  static constexpr uint16_t Endian_Scalar_Size = Endian_Scalar_Size_;
+  using ET = Eigen::Tensor<Scalar_, NumIndices_, Eigen::RowMajor>;
+  using Index = typename ET::Index;
+  GRID_SERIALIZABLE_CLASS_MEMBERS(NamedTensor
+                                  , ET, tensor
+                                  , std::vector<std::string>, IndexNames
+                                  );
 public:
+  // Named tensors are intended to be a superset of Eigen tensor
+  inline operator ET&() const { return tensor; }
  template<typename... IndexTypes>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NamedTensor(std::array<std::string,NumIndices_> &IndexNames_, Eigen::Index firstDimension, IndexTypes... otherDimensions)
-  : IndexNames{IndexNames_}, ET(firstDimension, otherDimensions...)
+  inline const Scalar_& operator()(const std::array<Eigen::Index, NumIndices_> &Indices) const
+  { return tensor.operator()(Indices); }
+  inline Scalar_& operator()(const std::array<Eigen::Index, NumIndices_> &Indices)
+  { return tensor.operator()(Indices); }
+  template<typename... IndexTypes>
+  inline const Scalar_& operator()(Eigen::Index firstDimension, IndexTypes... otherDimensions) const
  {
-    // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
-    assert(sizeof...(otherDimensions) + 1 == NumIndices_
-           && "NamedTensor error: dimensions in constructor != tensor rank");
+    // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
+    assert(sizeof...(otherDimensions) + 1 == NumIndices_ && "NamedTensor: dimensions != tensor rank");
+    return tensor.operator()(std::array<Eigen::Index, NumIndices_>{{firstDimension, otherDimensions...}});
+  }
+  template<typename... IndexTypes>
+  inline Scalar_& operator()(Eigen::Index firstDimension, IndexTypes... otherDimensions)
+  {
+    // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
+    assert(sizeof...(otherDimensions) + 1 == NumIndices_ && "NamedTensor: dimensions != tensor rank");
+    return tensor.operator()(std::array<Eigen::Index, NumIndices_>{{firstDimension, otherDimensions...}});
  }

+  // Construct a named tensor explicitly specifying size of each dimension
+  template<typename... IndexTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NamedTensor(std::array<std::string,NumIndices_> &IndexNames_, Eigen::Index firstDimension, IndexTypes... otherDimensions)
+  : tensor(firstDimension, otherDimensions...), IndexNames{NumIndices}
+  {
+    // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
+    assert(sizeof...(otherDimensions) + 1 == NumIndices_ && "NamedTensor: dimensions != tensor rank");
+    for( int i = 0; i < NumIndices_; i++ )
+      IndexNames[i] = IndexNames_[i];
+  }
+
+  // Default constructor (assumes tensor will be loaded from file)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NamedTensor() : IndexNames{NumIndices_} {}
+  
+  // Construct a named tensor without specifying size of each dimension (because it will be loaded from file)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NamedTensor(std::array<std::string,NumIndices_> &IndexNames_)
+  : IndexNames{NumIndices_}
+  {
+    for( int i = 0; i < NumIndices_; i++ )
+      IndexNames[i] = IndexNames_[i];
+  }
+  
  // Share data for timeslices we calculated with other nodes
  inline void SliceShare( GridCartesian * gridLowDim, GridCartesian * gridHighDim ) {
-    Grid::SliceShare( gridLowDim, gridHighDim, this->data(), (int) (this->size() * sizeof(Scalar_)));
+    Grid::SliceShare( gridLowDim, gridHighDim, tensor.data(), (int) (tensor.size() * sizeof(Scalar_)));
  }

  // load and save - not virtual - probably all changes
-  inline void load(const std::string filename);
-  inline void save(const std::string filename) const;
-  inline void ReadBinary(const std::string filename);
-  inline void WriteBinary(const std::string filename);
+  template<typename Reader> inline void read (Reader &r, const char * pszTag = nullptr);
+  template<typename Writer> inline void write(Writer &w, const char * pszTag = nullptr) const;
+  template<typename Reader> inline void read (const char * filename, const char * pszTag = nullptr);
+  template<typename Writer> inline void write(const char * filename, const char * pszTag = nullptr) const;
+  EIGEN_DEPRECATED inline void ReadBinary (const std::string filename); // To be removed
+  EIGEN_DEPRECATED inline void WriteBinary(const std::string filename); // To be removed
 };

+// Is this a named tensor
+template<typename T, typename V = void> struct is_named_tensor : public std::false_type {};
+template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size_> struct is_named_tensor<NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size_>> : public std::true_type {};
+template<typename T> struct is_named_tensor<T, typename std::enable_if<std::is_base_of<NamedTensor<typename T::Scalar, T::NumIndices, T::Endian_Scalar_Size_>, T>::value>::type> : public std::true_type {};
+
 /******************************************************************************
 Save NamedTensor binary format (NB: On-disk format is Big Endian)
 Assumes the Scalar_ objects are contiguous (no padding)
@@ -301,7 +350,7 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::WriteBinary(const st
  assert((sizeof(Scalar_) % Endian_Scalar_Size) == 0 && "NamedTensor error: Scalar_ is not composed of Endian_Scalar_Size" );
  // Size of the data (in bytes)
  const uint32_t Scalar_Size{sizeof(Scalar_)};
-  const auto NumElements{this->size()};
+  const auto NumElements{tensor.size()};
  const std::streamsize TotalDataSize{static_cast<std::streamsize>(NumElements * Scalar_Size)};
  uint64_t u64 = htobe64(static_cast<uint64_t>(TotalDataSize));
  w.write(reinterpret_cast<const char *>(&u64), sizeof(u64));
@@ -313,14 +362,14 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::WriteBinary(const st
  w.write(reinterpret_cast<const char *>(&u16), sizeof(u16));
  // number of dimensions which aren't 1
  u16 = static_cast<uint16_t>(this->NumIndices);
-  for( auto dim : this->dimensions() )
+  for( auto dim : tensor.dimensions() )
    if( dim == 1 )
      u16--;
  u16 = htobe16( u16 );
  w.write(reinterpret_cast<const char *>(&u16), sizeof(u16));
  // dimensions together with names
  int d = 0;
-  for( auto dim : this->dimensions() ) {
+  for( auto dim : tensor.dimensions() ) {
    if( dim != 1 ) {
      // size of this dimension
      u16 = htobe16( static_cast<uint16_t>( dim ) );
@@ -334,7 +383,7 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::WriteBinary(const st
    d++;
  }
  // Actual data
-  char * const pStart{reinterpret_cast<char *>(this->data())};
+  char * const pStart{reinterpret_cast<char *>(tensor.data())};
  // Swap to network byte order in place (alternative is to copy memory - still slow)
  void * const pEnd{pStart + TotalDataSize};
  if(Endian_Scalar_Size == 8)
@@ -359,9 +408,9 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::WriteBinary(const st
      * p = be16toh( * p );
  // checksum
 #ifdef USE_IPP
-  u32 = htobe32(GridChecksum::crc32c(this->data(), TotalDataSize));
+  u32 = htobe32(GridChecksum::crc32c(tensor.data(), TotalDataSize));
 #else
-  u32 = htobe32(GridChecksum::crc32(this->data(), TotalDataSize));
+  u32 = htobe32(GridChecksum::crc32(tensor.data(), TotalDataSize));
 #endif
  w.write(reinterpret_cast<const char *>(&u32), sizeof(u32));
 }
@@ -381,7 +430,7 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::ReadBinary(const std
  assert((sizeof(Scalar_) % Endian_Scalar_Size) == 0 && "NamedTensor error: Scalar_ is not composed of Endian_Scalar_Size" );
  // Size of the data in bytes
  const uint32_t Scalar_Size{sizeof(Scalar_)};
-  const auto NumElements{this->size()};
+  const auto NumElements{tensor.size()};
  const std::streamsize TotalDataSize{static_cast<std::streamsize>(NumElements * Scalar_Size)};
  uint64_t u64;
  r.read(reinterpret_cast<char *>(&u64), sizeof(u64));
@@ -397,13 +446,13 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::ReadBinary(const std
  // number of dimensions which aren't 1
  r.read(reinterpret_cast<char *>(&u16), sizeof(u16));
  u16 = be16toh( u16 );
-  for( auto dim : this->dimensions() )
+  for( auto dim : tensor.dimensions() )
    if( dim == 1 )
      u16++;
  assert( this->NumIndices == u16 && "NamedTensor error: number of dimensions which aren't 1" );
  // dimensions together with names
  int d = 0;
-  for( auto dim : this->dimensions() ) {
+  for( auto dim : tensor.dimensions() ) {
    if( dim != 1 ) {
      // size of dimension
      r.read(reinterpret_cast<char *>(&u16), sizeof(u16));
@@ -420,7 +469,7 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::ReadBinary(const std
    d++;
  }
  // Actual data
-  char * const pStart{reinterpret_cast<char *>(this->data())};
+  char * const pStart{reinterpret_cast<char *>(tensor.data())};
  void * const pEnd{pStart + TotalDataSize};
  r.read(pStart,TotalDataSize);
  // Swap back from network byte order
@@ -437,44 +486,59 @@ void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::ReadBinary(const std
  r.read(reinterpret_cast<char *>(&u32), sizeof(u32));
  u32 = be32toh( u32 );
 #ifdef USE_IPP
-  u32 -= GridChecksum::crc32c(this->data(), TotalDataSize);
+  u32 -= GridChecksum::crc32c(tensor.data(), TotalDataSize);
 #else
-  u32 -= GridChecksum::crc32(this->data(), TotalDataSize);
+  u32 -= GridChecksum::crc32(tensor.data(), TotalDataSize);
 #endif
  assert( u32 == 0 && "NamedTensor error: Perambulator checksum invalid");
 }

 /******************************************************************************
- Save NamedTensor Hdf5 format
+ Write NamedTensor
 ******************************************************************************/

 template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size>
-void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::save(const std::string filename) const {
+template<typename Writer>
+void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::write(Writer &w, const char * pszTag)const{
+  if( pszTag == nullptr )
+    pszTag = "tensor";
+  write(w, pszTag, *this);
+}
+
+template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size>
+template<typename Writer>
+void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::write(const char * filename, const char * pszTag)const{
  LOG(Message) << "Writing NamedTensor to \"" << filename << "\"" << std::endl;
-#ifndef HAVE_HDF5
-  LOG(Message) << "Error: I/O for NamedTensor requires HDF5" << std::endl;
-#else
-  Hdf5Writer w(filename);
-  //w << this->NumIndices << this->dimensions() << this->IndexNames;
-#endif
+  Writer w(filename);
+  write(w, pszTag);
 }

 /******************************************************************************
- Load NamedTensor Hdf5 format
+ Read NamedTensor
 ******************************************************************************/

 template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size>
-void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::load(const std::string filename) {
+template<typename Reader>
+void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::read(Reader &r, const char * pszTag) {
+  // Grab index names and dimensions
+  if( pszTag == nullptr )
+    pszTag = "tensor";
+  std::vector<std::string> OldIndexNames{std::move(IndexNames)};
+  typename ET::Dimensions OldDimensions{tensor.dimensions()};
+  read(r, pszTag, *this);
+  const typename ET::Dimensions & NewDimensions{tensor.dimensions()};
+  for( int i=0; i < NumIndices_; i++ ) {
+    assert(OldDimensions[i] == 0 || OldDimensions[i] == NewDimensions[i] && "NamedTensor::load dimension size");
+    assert(OldIndexNames[i].size() == 0 || OldIndexNames[i] == IndexNames[i] && "NamedTensor::load dimension name");
+  }
+}
+
+template<typename Scalar_, int NumIndices_, uint16_t Endian_Scalar_Size>
+template<typename Reader>
+void NamedTensor<Scalar_, NumIndices_, Endian_Scalar_Size>::read(const char * filename, const char * pszTag) {
  LOG(Message) << "Reading NamedTensor from \"" << filename << "\"" << std::endl;
-#ifndef HAVE_HDF5
-  LOG(Message) << "Error: I/O for NamedTensor requires HDF5" << std::endl;
-#else
-  Hdf5Reader r(filename);
-  typename ET::Dimensions d;
-  std::array<std::string,NumIndices_> n;
-  //r >> this->NumIndices >> d >> n;
-  //this->IndexNames = n;
-#endif
+  Reader r(filename);
+  read(r, pszTag);
 }

 /******************************************************************************
--- a/Hadrons/Modules/MDistil/PerambFromSolve.hpp
+++ b/Hadrons/Modules/MDistil/PerambFromSolve.hpp
@@ -238,9 +238,11 @@ void TPerambFromSolve<FImpl>::execute(void)
      }
    }

-    if(PerambFileName.length())
-        perambulator.WriteBinary(PerambFileName + "." + std::to_string(vm().getTrajectory()));
- 
+  if(PerambFileName.length()) {
+    std::string sPerambName{PerambFileName + "." + std::to_string(vm().getTrajectory())};
+    //perambulator.WriteBinary(sPerambName);
+    perambulator.template write<Hdf5Writer>((sPerambName + ".h5").c_str(), sPerambName.c_str());
+  }
 }

 END_MODULE_NAMESPACE
--- a/Hadrons/Modules/MDistil/Perambulator.hpp
+++ b/Hadrons/Modules/MDistil/Perambulator.hpp
@@ -286,8 +286,11 @@ void TPerambulator<FImpl>::execute(void)
    std::cout <<  "perambulator done" << std::endl;
    perambulator.SliceShare( grid3d, grid4d );

-    if(PerambFileName.length())
-        perambulator.WriteBinary(PerambFileName + "." + std::to_string(vm().getTrajectory()));
+  if(PerambFileName.length()) {
+    std::string sPerambName{PerambFileName + "." + std::to_string(vm().getTrajectory())};
+    //perambulator.WriteBinary(sPerambName);
+    perambulator.template write<Hdf5Writer>((sPerambName + ".h5").c_str(), sPerambName.c_str());
+  }
 }

 END_MODULE_NAMESPACE
--- a/Hadrons/Modules/MIO/LoadPerambulator.hpp
+++ b/Hadrons/Modules/MIO/LoadPerambulator.hpp
@@ -118,13 +118,11 @@ void TLoadPerambulator<FImpl>::setup(void)
 template <typename FImpl>
 void TLoadPerambulator<FImpl>::execute(void)
 {
-    auto        &perambulator = envGet(MDistil::Perambulator<SpinVector COMMA 6 COMMA sizeof(Real)>,
-		                                           getName());
-
-	const std::string &PerambFileName{par().PerambFileName + "." + std::to_string(vm().getTrajectory())};
-        std::cout << "reading perambulator from file " << PerambFileName << std::endl;
-        perambulator.ReadBinary(PerambFileName);
-                                                          
+  auto &perambulator = envGet(MDistil::Perambulator<SpinVector COMMA 6 COMMA sizeof(Real)>, getName());
+  const std::string sPerambName{par().PerambFileName + "." + std::to_string(vm().getTrajectory())};
+  const std::string PerambFileName{sPerambName + ".h5"};
+  std::cout << "reading perambulator from file " << PerambFileName << std::endl;
+  perambulator.template read<Hdf5Reader>(PerambFileName.c_str(), sPerambName.c_str());
 }

 END_MODULE_NAMESPACE
--- a/tests/hadrons/Test_distil.cc
+++ b/tests/hadrons/Test_distil.cc
@@ -647,7 +647,8 @@ bool bNumber( int &ri, const char * & pstr, bool bGobbleWhiteSpace = true )
 typedef Grid::Hadrons::MDistil::NamedTensor<Complex,3,sizeof(Real)> MyTensor;

 template<typename T>
-void DebugShowTensor(T &x, const char * n)
+typename std::enable_if<Grid::EigenIO::is_tensor<T>::value && !Grid::Hadrons::MDistil::is_named_tensor<T>::value>::type
+DebugShowTensor(T &x, const char * n, std::string * pIndexNames=nullptr)
 {
  const MyTensor::Index s{x.size()};
  std::cout << n << ".size() = " << s << std::endl;
@@ -662,7 +663,10 @@ void DebugShowTensor(T &x, const char * n)
  MyTensor::Index SizeCalculated{1};
  std::cout << "Dimensions again";
  for(int i=0 ; i < x.NumDimensions ; i++ ) {
-    std::cout << " : [" << i << /*", " << x.IndexNames[i] << */"]=" << x.dimension(i);
+    std::cout << " : [" << i;
+    if( pIndexNames )
+      std::cout << ", " << pIndexNames[i];
+    std::cout << "]=" << x.dimension(i);
    SizeCalculated *= d[i];
  }
  std::cout << std::endl;
@@ -686,6 +690,13 @@ void DebugShowTensor(T &x, const char * n)
  std::cout << std::endl;
 }

+template<typename T>
+typename std::enable_if<Grid::Hadrons::MDistil::is_named_tensor<T>::value>::type
+DebugShowTensor(T &x, const char * n)
+{
+  DebugShowTensor( x.tensor, n, &x.IndexNames[0] );
+}
+
 // Test whether typedef and underlying types are the same

 void DebugTestTypeEqualities(void)
@@ -757,21 +768,59 @@ bool DebugEigenTest()
  MyTensor x(as, 2,1,4);
  DebugShowTensor(x, "x");
  x.WriteBinary(pszTestFileName);
-  DebugShowTensor(x, "x");
  // Test initialisation of an array of strings
  for( auto a : as )
    std::cout << a << std::endl;
-  Grid::Hadrons::MDistil::Peramb<Complex,3,sizeof(Real)> p{as,2,7,2};
+  Grid::Hadrons::MDistil::Perambulator<Complex,3,sizeof(Real)> p{as,2,7,2};
  DebugShowTensor(p, "p");
  std::cout << "p.IndexNames follow" << std::endl;
  for( auto a : p.IndexNames )
    std::cout << a << std::endl;
+
  // Now see whether we can read a tensor back
  std::array<std::string,3> Names2={"Alpha", "Gamma", "Delta"};
  MyTensor y(Names2, 2,4,1);
  y.ReadBinary(pszTestFileName);
  DebugShowTensor(y, "y");

+  // Now see whether we can read a tensor back from an hdf5 file
+  const char * pszH5Name = "test.h5";
+  y.write<Hdf5Writer>(pszH5Name);
+  {
+    MyTensor z;
+    const char * pszName = "z1";
+    DebugShowTensor(z, pszName);
+    z.read<Hdf5Reader>(pszH5Name);
+    DebugShowTensor(z, pszName);
+  }
+  {
+    MyTensor z(Names2,2,0,0);
+    const char * pszName = "z2";
+    DebugShowTensor(z, pszName);
+    z.read<Hdf5Reader>(pszH5Name);
+    DebugShowTensor(z, pszName);
+  }
+  if((0)) // The following tests would fail
+  {
+    MyTensor z(Names2,2,0,78);
+    //std::array<std::string,3> NamesBad={"Alpha", "Gamma", "Kilo"};
+    //MyTensor z(NamesBad);
+    const char * pszName = "zFail";
+    DebugShowTensor(z, pszName);
+    z.read<Hdf5Reader>(pszH5Name);
+    DebugShowTensor(z, pszName);
+  }
+  // Now see whether we can read a tensor back from an xml file
+  const char * pszXmlName = "test.xml";
+  y.write<XmlWriter>(pszXmlName);
+  {
+    MyTensor z;
+    const char * pszName = "xml1";
+    DebugShowTensor(z, pszName);
+    z.read<XmlReader>(pszXmlName);
+    DebugShowTensor(z, pszName);
+  }
+
  // Testing whether typedef produces the same type - yes it does

  DebugTestTypeEqualities();
@@ -954,8 +1003,8 @@ bool DebugGridTensorTest( void )
  for( auto x : toc7 ) std::cout << " [" << i++ << "]=" << x;
  std::cout << std::endl;

-  t2 o2;
-  auto a2 = TensorRemove(o2);
+  //t2 o2;
+  //auto a2 = TensorRemove(o2);
  //t3 o3;
  //t4 o4;
  //auto a3 = TensorRemove(o3);