From 63c97db41468b2b599a6650b9d9d809eabf4e98c Mon Sep 17 00:00:00 2001
From: Michael Marshall <michael.marshall@ed.ac.uk>
Date: Tue, 19 Feb 2019 13:29:08 +0000
Subject: [PATCH 1/2] Prior to rationalising 2 versions of BaseIO::write
 (scalar and vector)

---
 Grid/serialisation/BaseIO.h    |  46 +++++---
 tests/IO/Test_serialisation.cc | 192 +++++++++++++--------------------
 2 files changed, 105 insertions(+), 133 deletions(-)
diff --git a/Grid/serialisation/BaseIO.h b/Grid/serialisation/BaseIO.h
index 6adb87d3..771cc628 100644
--- a/Grid/serialisation/BaseIO.h
+++ b/Grid/serialisation/BaseIO.h
@@ -36,13 +36,19 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
 #include <Grid/Eigen/unsupported/CXX11/Tensor>
 
 namespace Grid {
+  // TODO Support Grid::complex from GPU port
+  template<typename T> using Grid_complex = std::complex<T>;
+
+  // Returns original type, except for Grid_complex, where it returns the underlying type
+  template<typename T> struct RealType { using type = T; };
+  template<typename T> struct RealType<Grid_complex<T>> { using type = T; };
+
   namespace EigenIO {
     template<typename T> struct is_complex : public std::false_type {};
-    template<typename T> struct is_complex<std::complex<T>>
+    template<typename T> struct is_complex<Grid_complex<T>>
         : std::integral_constant<bool, std::is_arithmetic<T>::value> {};
 
     // Eigen tensors can be composed of arithmetic scalar and complex types
-    // TODO Support Grid::comples from GPU port
     template<typename T> struct is_scalar : std::integral_constant<bool,
                             std::is_arithmetic<T>::value || is_complex<T>::value> {};
 
@@ -88,11 +94,12 @@ namespace Grid {
     template <typename T, typename C = void> struct Traits {}; // C needed for specialisation
     // This defines the bottom level - i.e. it's a description of the underlying scalar
     template <typename T> struct Traits<T, typename std::enable_if<is_scalar<T>::value, void>::type> {
+      using scalar_type = T; // Type of the underlying scalar
+      using scalar_real = typename RealType<scalar_type>::type; // real type underlying scalar_type
       static constexpr unsigned int depth = 0;  // How many levels of Grid Tensor there are (TensorLevel)
       static constexpr unsigned int rank = 0;   // The rank of the grid tensor (i.e. how many indices used)
       static constexpr unsigned int rank_non_trivial = 0; // As per rank, but excludes those of dimension 1
       static constexpr unsigned int count = 1;  // total number of elements (i.e. product of dimensions)
-      using scalar_type = T;                              // Type of the underlying scalar
       static constexpr std::size_t scalar_size = sizeof(T);    // Size of the underlying scalar in bytes
       static constexpr std::size_t size = scalar_size * count; // total size of elements in bytes
       static constexpr std::size_t Dimension(unsigned int dim) { return 0; } // Dimension size
@@ -114,11 +121,12 @@ namespace Grid {
       //      count  = 48
     };
     template <typename T> struct Traits<iScalar<T>> {
+      using scalar_type = typename Traits<T>::scalar_type;
+      using scalar_real = typename RealType<scalar_type>::type;
       static constexpr unsigned int depth = 1 + Traits<T>::depth;
       static constexpr unsigned int rank = 0 + Traits<T>::rank;
       static constexpr unsigned int rank_non_trivial = 0 + Traits<T>::rank_non_trivial;
       static constexpr unsigned int count = 1 * Traits<T>::count;
-      using scalar_type = typename Traits<T>::scalar_type;
       static constexpr std::size_t scalar_size = Traits<T>::scalar_size;
       static constexpr std::size_t size = scalar_size * count;
       static constexpr std::size_t Dimension(unsigned int dim) {
@@ -127,11 +135,12 @@ namespace Grid {
         return Traits<T>::DimensionNT(dim); }
     };
     template <typename T, int N> struct Traits<iVector<T, N>> {
+      using scalar_type = typename Traits<T>::scalar_type;
+      using scalar_real = typename RealType<scalar_type>::type;
       static constexpr unsigned int depth = 1 + Traits<T>::depth;
       static constexpr unsigned int rank = 1 + Traits<T>::rank;
       static constexpr unsigned int rank_non_trivial = (N>1 ? 1 : 0) + Traits<T>::rank_non_trivial;
       static constexpr unsigned int count = N * Traits<T>::count;
-      using scalar_type = typename Traits<T>::scalar_type;
       static constexpr std::size_t scalar_size = Traits<T>::scalar_size;
       static constexpr std::size_t size = scalar_size * count;
       static constexpr std::size_t Dimension(unsigned int dim) {
@@ -141,11 +150,12 @@ namespace Grid {
       }
     };
     template <typename T, int N> struct Traits<iMatrix<T, N>> {
+      using scalar_type = typename Traits<T>::scalar_type;
+      using scalar_real = typename RealType<scalar_type>::type;
       static constexpr unsigned int depth = 1 + Traits<T>::depth;
       static constexpr unsigned int rank = 2 + Traits<T>::rank;
       static constexpr unsigned int rank_non_trivial = (N>1 ? 2 : 0) + Traits<T>::rank_non_trivial;
       static constexpr unsigned int count = N * N * Traits<T>::count;
-      using scalar_type = typename Traits<T>::scalar_type;
       static constexpr std::size_t scalar_size = Traits<T>::scalar_size;
       static constexpr std::size_t size = scalar_size * count;
       static constexpr std::size_t Dimension(unsigned int dim) {
@@ -230,16 +240,22 @@ namespace Grid {
     }
   }
 
-  // Used for sequential initialisations
-  template<typename T> constexpr T Flag = 1;
-  template<typename T> constexpr std::complex<T> Flag<std::complex<T>> {1, -1};
-  // Returns the type of the real part of an arithmetic type
-  template<typename T> struct RealType { using type = T; };
-  template<typename T> struct RealType<std::complex<T>> { using type = T; };
+  // Sequential initialisation of tensors
+  // Would have preferred to define template variables for this, but that's c++ 17
+  template <typename ETensor>
+  typename std::enable_if<EigenIO::is_tensor<ETensor>::value && !EigenIO::is_complex<typename EigenIO::Traits<typename ETensor::Scalar>::scalar_type>::value, void>::type
+  SequentialInit( ETensor &ET, typename EigenIO::Traits<typename ETensor::Scalar>::scalar_type Inc = 1 )
+  {
+    using Traits = EigenIO::Traits<typename ETensor::Scalar>;
+    using scalar_type = typename Traits::scalar_type;
+    for_all( ET, [&](scalar_type &c, typename ETensor::Index n, const std::array<size_t, ETensor::NumIndices + Traits::rank_non_trivial> &Dims ) {
+      c = Inc * static_cast<scalar_type>(n);
+    } );
+  }
 
   template <typename ETensor>
-  typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
-  Sequential_Init( ETensor &ET, typename EigenIO::Traits<typename ETensor::Scalar>::scalar_type Inc = Flag<typename EigenIO::Traits<typename ETensor::Scalar>::scalar_type> )
+  typename std::enable_if<EigenIO::is_tensor<ETensor>::value && EigenIO::is_complex<typename EigenIO::Traits<typename ETensor::Scalar>::scalar_type>::value, void>::type
+  SequentialInit( ETensor &ET, typename EigenIO::Traits<typename ETensor::Scalar>::scalar_type Inc={1,-1})
   {
     using Traits = EigenIO::Traits<typename ETensor::Scalar>;
     using scalar_type = typename Traits::scalar_type;
@@ -247,7 +263,7 @@ namespace Grid {
       c = Inc * static_cast<typename RealType<scalar_type>::type>(n);
     } );
   }
-
+  
   // Helper to dump a tensor
 #ifdef DEBUG
 #define dump_tensor(args...) dump_tensor_func(args)
diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc
index ae61eebe..342fc214 100644
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -82,7 +82,7 @@ bool     b   = false;
 template <typename W, typename R, typename O>
 void ioTest(const std::string &filename, const O &object, const std::string &name)
 {
-  std::cout << name << " IO test: writing ...";
+  std::cout << "IO test: " << name << " -> " << filename << " ...";
   // writer needs to be destroyed so that writing physically happens
   {
     W writer(filename);
@@ -92,149 +92,105 @@ void ioTest(const std::string &filename, const O &object, const std::string &nam
 
   std::cout << " done. reading...";
   R    reader(filename);
-  O    buf;
+  std::unique_ptr<O> buf( new O ); // In case object too big for stack
 
-  read(reader, "testobject", buf);
-  bool good = Serializable::CompareMember(object, buf);
+  read(reader, "testobject", *buf);
+  bool good = Serializable::CompareMember(object, *buf);
   if (!good) {
     std::cout << " failure!" << std::endl;
     if constexpr (EigenIO::is_tensor<O>::value)
-      dump_tensor(buf,"???");
+      dump_tensor(*buf,"???");
     exit(EXIT_FAILURE);
   }
   std::cout << " done." << std::endl;
 }
 
 #ifdef HAVE_HDF5
-typedef Eigen::Tensor<int, 5> ShortRank5Tensor;
-//typedef int TestScalar;
 typedef std::complex<double> TestScalar;
-typedef Eigen::Tensor<TestScalar, 3, Eigen::StorageOptions::RowMajor> TestTensor;
-typedef Eigen::TensorFixedSize<TestScalar, Eigen::Sizes<9,4,2>, Eigen::StorageOptions::RowMajor> TestTensorFixed;
-typedef std::vector<TestTensorFixed> aTestTensorFixed;
-typedef Eigen::TensorFixedSize<SpinColourVector, Eigen::Sizes<11,3,2>, Eigen::StorageOptions::RowMajor> LSCTensor;
-typedef Eigen::TensorFixedSize<LorentzColourMatrix, Eigen::Sizes<5,7,2>, Eigen::StorageOptions::RowMajor> LCMTensor;
-// From Test_serialisation.cc
+using TensorSingle = Eigen::TensorFixedSize<int, Eigen::Sizes<1>>;
+using TensorSimple = Eigen::Tensor<iMatrix<TestScalar,1>, 6>;
+typedef Eigen::Tensor<unsigned short, 5> TensorRank5UShort;
+typedef Eigen::Tensor<int, 5, Eigen::StorageOptions::RowMajor> TensorRank5IntAlt;
+typedef Eigen::Tensor<TestScalar, 3, Eigen::StorageOptions::RowMajor> TensorRank3;
+typedef Eigen::TensorFixedSize<TestScalar, Eigen::Sizes<9,4,2>, Eigen::StorageOptions::RowMajor> Tensor_9_4_2;
+typedef std::vector<Tensor_9_4_2> aTensor_9_4_2;
+typedef Eigen::TensorFixedSize<SpinColourVector, Eigen::Sizes<6,5>> LSCTensor;
+#ifdef DEBUG
+typedef Eigen::TensorFixedSize<iMatrix<iVector<iMatrix<iVector<LorentzColourMatrix,5>,2>,7>,3>, Eigen::Sizes<2,2,11,10,9>, Eigen::StorageOptions::RowMajor> LCMTensor;
+#endif
+
 class PerambIOTestClass: Serializable {
 public:
   using PerambTensor = Eigen::Tensor<SpinColourVector, 6, Eigen::StorageOptions::RowMajor>;
   GRID_SERIALIZABLE_CLASS_MEMBERS(PerambIOTestClass
-                                  , ShortRank5Tensor,         shortRank5Tensor
-                                  , PerambTensor,             Perambulator
+                                  , SpinColourVector, spinColourVector
+                                  , SpinColourMatrix, spinColourMatrix
                                   , std::vector<std::string>, DistilParameterNames
                                   , std::vector<int>,         DistilParameterValues
+                                  , PerambTensor,             Perambulator
                                   , PerambTensor,             Perambulator2
-                                  , SpinColourVector, scv
-                                  , SpinColourMatrix, scm
-                                  //, TestTensor,       Critter
-                                  //, TestTensorFixed,  FixedCritter
-                                  //, aTestTensorFixed, aFixedCritter
-                                  //, LSCTensor, MyLSCTensor
-                                  //, LCMTensor, MyLCMTensor
+                                  , TensorRank5UShort,        tensorRank5UShort
+                                  , TensorRank3,              tensorRank3
+                                  , Tensor_9_4_2,             tensor_9_4_2
+                                  , aTensor_9_4_2,            atensor_9_4_2
+                                  , LSCTensor,                MyLSCTensor
+#ifdef DEBUG
+                                  , LCMTensor,                MyLCMTensor
+#endif
                                   );
-  PerambIOTestClass() : Perambulator(2,3,1,4,5,1), Perambulator2(7,1,6,1,5,1),
-                        DistilParameterNames {"alpha", "beta", "gamma", "delta", "epsilon", "what's f?"},
-                        DistilParameterValues{2,3,1,4,5,1}
-                        , shortRank5Tensor{5,4,3,2,1}
-                        //, Critter(7,3,2)//, aFixedCritter(3)
+  PerambIOTestClass()
+  : DistilParameterNames {"alpha", "beta", "gamma", "delta", "epsilon", "zeta"}
+  , DistilParameterValues{2,3,1,4,5,1}
+  , Perambulator(2,3,1,4,5,1)
+  , Perambulator2(7,1,6,1,5,1)
+  , tensorRank5UShort{5,4,3,2,1}
+  , tensorRank3(7,3,2)
   {
-    Sequential_Init(Perambulator);
-    Sequential_Init(Perambulator2, {-3.1415927,7});
-    Sequential_Init(shortRank5Tensor);
+    Grid_complex<double> Flag{1,-3.1415927};
+    SequentialInit(Perambulator,  Flag);
+    SequentialInit(Perambulator2, Flag);
+    SequentialInit(tensorRank5UShort);
+    SequentialInit(tensorRank3, Flag);
+    SequentialInit(tensor_9_4_2, Flag);
+    for( auto &t : atensor_9_4_2 )
+      SequentialInit(t, Flag);
+    SequentialInit( MyLSCTensor );
+#ifdef DEBUG
+    SequentialInit( MyLCMTensor );
+#endif
   }
 };
 
-void EigenHdf5IOTest(void) {
-  SpinColourVector scv, scv2;
-  scv2 = scv;
-  ioTest<Hdf5Writer, Hdf5Reader, SpinColourVector>("iotest_vector.h5", scv, "SpinColourVector");
-  SpinColourMatrix scm;
-  ioTest<Hdf5Writer, Hdf5Reader, SpinColourMatrix>("iotest_matrix.h5", scm, "SpinColourMatrix");
+#define TensorWriteReadInnerNoInit( T ) \
+  ioTest<Hdf5Writer, Hdf5Reader, T>("iotest_"s + std::to_string(++TestNum) + "_" #T ".h5", t, #T);
+#define TensorWriteReadInner( T )  SequentialInit( t ); TensorWriteReadInnerNoInit( T )
+#define TensorWriteRead( T      ) { T t               ; TensorWriteReadInner( T ) }
+#define TensorWriteReadV(T, ... ) { T t( __VA_ARGS__ ); TensorWriteReadInner( T ) }
+#define TensorWriteReadLarge( T ) { std::unique_ptr<T> p{new T}; T &t{*p}; TensorWriteReadInnerNoInit(T) }
 
-  constexpr TestScalar Inc{1,-1};
+void EigenHdf5IOTest(void)
+{
+  unsigned int TestNum = 0;
+  using namespace std::string_literals;
+  TensorWriteRead( TensorSingle )
+  TensorWriteReadV( TensorSimple, 1, 1, 1, 1, 1, 1 )
+  TensorWriteReadV( TensorRank3, 6, 3, 2 )
+  TensorWriteRead ( Tensor_9_4_2 )
+  TensorWriteRead ( LSCTensor )
+  TensorWriteReadLarge( PerambIOTestClass )
+#ifdef DEBUG
+  std::cout << "sizeof( LCMTensor ) = " << sizeof( LCMTensor ) / 1024 / 1024 << " MB" << std::endl;
+  TensorWriteReadLarge ( LCMTensor )
+  // Also write > 4GB of complex numbers (I suspect this will fail inside Hdf5)
   {
-    using TestTensorSingle = Eigen::TensorFixedSize<int, Eigen::Sizes<1>>;
-    TestTensorSingle ts;
-    ts(0) = 7; // lucky
-    ioTest<Hdf5Writer, Hdf5Reader, TestTensorSingle>("iotest_single.h5", ts, "Tensor_single");
+    static constexpr size_t Num = 0x11000000;
+    std::cout << "Stress test: " << Num * sizeof( Grid_complex<double> ) / 1024 / 1024
+              << " MB array of complex<double>" << std::endl;
+    using Stress = std::vector<Grid_complex<double>>;
+    Stress t (Num);
+    TensorWriteReadInnerNoInit( Stress );
   }
-
-  {
-    using TestTensorSimple = Eigen::Tensor<iMatrix<TestScalar,1>, 6>;
-    TestTensorSimple ts(1,1,1,1,1,1);
-    ts(0,0,0,0,0,0) = Inc * 3.1415927;
-    ioTest<Hdf5Writer, Hdf5Reader, TestTensorSimple>("iotest_simple.h5", ts, "Tensor_simple");
-  }
-
-  TestTensor t(6,3,2);
-  TestScalar Val{Inc};
-  for( int i = 0 ; i < t.dimension(0) ; i++)
-    for( int j = 0 ; j < t.dimension(1) ; j++)
-      for( int k = 0 ; k < t.dimension(2) ; k++) {
-        t(i,j,k) = Val;
-        Val += Inc;
-      }
-  ioTest<Hdf5Writer, Hdf5Reader, TestTensor>("iotest_tensor.h5", t, "eigen_tensor_instance_name");
-  //dump_tensor(t, "t");
-
-  // Now serialise a fixed size tensor
-  using FixedTensor = Eigen::TensorFixedSize<TestScalar, Eigen::Sizes<8,4,3>>;
-  FixedTensor tf;
-  Val = Inc;
-  for( int i = 0 ; i < tf.dimension(0) ; i++)
-    for( int j = 0 ; j < tf.dimension(1) ; j++)
-      for( int k = 0 ; k < tf.dimension(2) ; k++) {
-        tf(i,j,k) = Val;
-        Val += Inc;
-      }
-  ioTest<Hdf5Writer, Hdf5Reader, FixedTensor>("iotest_tensor_fixed.h5", tf, "eigen_tensor_fixed_name");
-  //dump_tensor(tf, "tf");
-
-  PerambIOTestClass o;
-  //dump_tensor(o.Perambulator, "Perambulator" );
-  dump_tensor(o.shortRank5Tensor, "shortRank5Tensor");
-  /*for_all( o.FixedCritter, [&](TestScalar &c, float f, const std::array<size_t,TestTensorFixed::NumIndices> &Dims ){
-    c = TestScalar{f,-f};
-    //std::cout << " a(" << Dims[0] << "," << Dims[1] << "," << Dims[2] << ")=" << c;
-  } );
-  for( auto &z : o.aFixedCritter )
-    for_all( z, [&](TestScalar &c, float f, const std::array<size_t,TestTensorFixed::NumIndices> &Dims ){
-      c = TestScalar{f,-f};
-      //std::cout << " a(" << Dims[0] << "," << Dims[1] << "," << Dims[2] << ")=" << c;
-    } );*/
-  ioTest<Hdf5Writer, Hdf5Reader, PerambIOTestClass>("iotest_object.h5", o, "PerambIOTestClass_object_instance_name");
-  //DumpMemoryOrder(o.Perambulator);
-
-  // Tensor of spin colour
-  LSCTensor l;
-  Val = 0;
-  for( int i = 0 ; i < l.dimension(0) ; i++)
-    for( int j = 0 ; j < l.dimension(1) ; j++)
-      for( int k = 0 ; k < l.dimension(2) ; k++)
-        for( int s = 0 ; s < Ns ; s++ )
-          for( int c = 0 ; c < Nc ; c++ )
-          {
-            l(i,j,k)()(s)(c) = Val;
-            Val += Inc;
-          }
-  ioTest<Hdf5Writer, Hdf5Reader, LSCTensor>("iotest_LSCTensor.h5", l, "LSCTensor_object_instance_name");
-  //dump_tensor(l, "l");
-
-  // Tensor of spin colour
-  LCMTensor l2;
-  Val = 0;
-  for( int i = 0 ; i < l2.dimension(0) ; i++)
-    for( int j = 0 ; j < l2.dimension(1) ; j++)
-      for( int k = 0 ; k < l2.dimension(2) ; k++)
-        for( int l = 0 ; l < Ns ; l++ )
-          for( int c = 0 ; c < Nc ; c++ )
-            for( int c2 = 0 ; c2 < Nc ; c2++ )
-            {
-              l2(i,j,k)(l)()(c,c2) = Val;
-              Val += Inc;
-            }
-  ioTest<Hdf5Writer, Hdf5Reader, LCMTensor>("iotest_LCMTensor.h5", l2, "LCMTensor_object_instance_name");
-  //dump_tensor(l2, "l2");
+#endif
 }
 #endif
 

From c14547ddbecb1bfacf17841e719de167ffcdc2ce Mon Sep 17 00:00:00 2001
From: Michael Marshall <michael.marshall@ed.ac.uk>
Date: Tue, 19 Feb 2019 16:12:55 +0000
Subject: [PATCH 2/2] EigenIO writing rationalised. All indices (trivial or
 not) written

---
 Grid/serialisation/BaseIO.h    | 193 +++++++++++----------------------
 tests/IO/Test_serialisation.cc |  41 +++++--
 2 files changed, 92 insertions(+), 142 deletions(-)

diff --git a/Grid/serialisation/BaseIO.h b/Grid/serialisation/BaseIO.h
index 771cc628..92c02476 100644
--- a/Grid/serialisation/BaseIO.h
+++ b/Grid/serialisation/BaseIO.h
@@ -96,7 +96,6 @@ namespace Grid {
     template <typename T> struct Traits<T, typename std::enable_if<is_scalar<T>::value, void>::type> {
       using scalar_type = T; // Type of the underlying scalar
       using scalar_real = typename RealType<scalar_type>::type; // real type underlying scalar_type
-      static constexpr unsigned int depth = 0;  // How many levels of Grid Tensor there are (TensorLevel)
       static constexpr unsigned int rank = 0;   // The rank of the grid tensor (i.e. how many indices used)
       static constexpr unsigned int rank_non_trivial = 0; // As per rank, but excludes those of dimension 1
       static constexpr unsigned int count = 1;  // total number of elements (i.e. product of dimensions)
@@ -123,8 +122,7 @@ namespace Grid {
     template <typename T> struct Traits<iScalar<T>> {
       using scalar_type = typename Traits<T>::scalar_type;
       using scalar_real = typename RealType<scalar_type>::type;
-      static constexpr unsigned int depth = 1 + Traits<T>::depth;
-      static constexpr unsigned int rank = 0 + Traits<T>::rank;
+      static constexpr unsigned int rank = 1 + Traits<T>::rank;
       static constexpr unsigned int rank_non_trivial = 0 + Traits<T>::rank_non_trivial;
       static constexpr unsigned int count = 1 * Traits<T>::count;
       static constexpr std::size_t scalar_size = Traits<T>::scalar_size;
@@ -137,7 +135,6 @@ namespace Grid {
     template <typename T, int N> struct Traits<iVector<T, N>> {
       using scalar_type = typename Traits<T>::scalar_type;
       using scalar_real = typename RealType<scalar_type>::type;
-      static constexpr unsigned int depth = 1 + Traits<T>::depth;
       static constexpr unsigned int rank = 1 + Traits<T>::rank;
       static constexpr unsigned int rank_non_trivial = (N>1 ? 1 : 0) + Traits<T>::rank_non_trivial;
       static constexpr unsigned int count = N * Traits<T>::count;
@@ -152,7 +149,6 @@ namespace Grid {
     template <typename T, int N> struct Traits<iMatrix<T, N>> {
       using scalar_type = typename Traits<T>::scalar_type;
       using scalar_real = typename RealType<scalar_type>::type;
-      static constexpr unsigned int depth = 1 + Traits<T>::depth;
       static constexpr unsigned int rank = 2 + Traits<T>::rank;
       static constexpr unsigned int rank_non_trivial = (N>1 ? 2 : 0) + Traits<T>::rank_non_trivial;
       static constexpr unsigned int count = N * N * Traits<T>::count;
@@ -377,10 +373,7 @@ namespace Grid {
     template <typename U, int N>
     void write(const std::string &s, const iMatrix<U, N> &output);
     template <typename ETensor>
-    typename std::enable_if<EigenIO::is_tensor_of_scalar<ETensor>::value, void>::type
-    write(const std::string &s, const ETensor &output);
-    template <typename ETensor>
-    typename std::enable_if<EigenIO::is_tensor_of_container<ETensor>::value, void>::type
+    typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
     write(const std::string &s, const ETensor &output);
 
     void         scientificFormat(const bool set);
@@ -502,128 +495,64 @@ namespace Grid {
   {
     upcast->writeDefault(s, tensorToVec(output));
   }
-
-  // Eigen::Tensors of arithmetic/complex base type
-  template <typename T>
-  template <typename ETensor>
-  typename std::enable_if<EigenIO::is_tensor_of_scalar<ETensor>::value, void>::type
-  Writer<T>::write(const std::string &s, const ETensor &output)
-  {
-    const typename ETensor::Index NumElements{output.size()};
-    assert( NumElements > 0 );
-    if( NumElements == 1 )
-      upcast->writeDefault(s, * output.data());
-    else {
-      // We're only interested in non-trivial dimensions (i.e. dimensions > 1)
-      unsigned int TrivialDimCount{0};
-      std::vector<size_t> NonTrivialDims;
-      NonTrivialDims.reserve(output.NumDimensions); // Make sure we only do one malloc
-      for(auto i = 0; i < output.NumDimensions; i++ ) {
-        auto dim = output.dimension(i);
-        if( dim <= 1 ) {
-          TrivialDimCount++;
-          assert( dim == 1 ); // Not expecting dimension to be <= 0
-        } else {
-          size_t s = static_cast<size_t>(dim);
-          assert( s == dim ); // check we didn't lose anything in the conversion
-          NonTrivialDims.push_back(s);
-        }
-      }
-      // NB: NumElements > 1 implies this is not a scalar, so some dims should be left
-      assert( output.NumDimensions > TrivialDimCount );
-      // If the Tensor isn't in Row-Major order, then we'll need to copy it's data
-      const bool CopyData{ETensor::Layout != Eigen::StorageOptions::RowMajor};
-      using Scalar = typename ETensor::Scalar;
-      const Scalar * pWriteBuffer;
-      Scalar * pCopyBuffer = nullptr;
-      if( !CopyData )
-        pWriteBuffer = output.data();
-      else {
-        // Regardless of the Eigen::Tensor storage order, the copy will be Row Major
-        pCopyBuffer = static_cast<Scalar *>(malloc(sizeof(Scalar) * NumElements));
-        pWriteBuffer = pCopyBuffer;
-        std::array<typename ETensor::Index, ETensor::NumIndices> MyIndex;
-        for( auto &idx : MyIndex ) idx = 0;
-        for( typename ETensor::Index n = 0; n < NumElements; n++ ) {
-          pCopyBuffer[n] = output( MyIndex );
-          // Now increment the index
-          for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- )
-            MyIndex[i] = 0;
-        }
-      }
-      upcast->template writeMultiDim<typename ETensor::Scalar>(s, NonTrivialDims, pWriteBuffer, NumElements);
-      if( pCopyBuffer ) free( pCopyBuffer );
-    }
-  }
   
   // Eigen::Tensors of Grid tensors (iScalar, iVector, iMatrix)
   template <typename T>
   template <typename ETensor/*, typename U, int N*/>
-  typename std::enable_if<EigenIO::is_tensor_of_container<ETensor>::value, void>::type
+  typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
   Writer<T>::write(const std::string &s, const ETensor &output)
   {
-    const typename ETensor::Index NumElements{output.size()};
+    using Index = typename ETensor::Index;
+    using Container = typename ETensor::Scalar; // NB: could be same as Scalar
+    using Traits = EigenIO::Traits<Container>;
+    using Scalar = typename Traits::scalar_type; // type of the underlying scalar
+    constexpr unsigned int TensorRank{ETensor::NumIndices};
+    constexpr unsigned int ContainerRank{Traits::rank}; // Only non-zero for containers
+    constexpr unsigned int TotalRank{TensorRank + ContainerRank};
+    const Index NumElements{output.size()};
     assert( NumElements > 0 );
-    if( NumElements == 1 )
-      upcast->writeDefault(s, tensorToVec(* output.data()));
-    else {
-      // We're only interested in non-trivial dimensions (i.e. dimensions > 1)
-      unsigned int TrivialDimCount{0};
-      std::vector<size_t> NonTrivialDims;
-      NonTrivialDims.reserve(output.NumDimensions + EigenIO::Traits<typename ETensor::Scalar>::rank_non_trivial); // Make sure we only do one malloc
-      for(auto i = 0; i < output.NumDimensions; i++ ) {
-        auto dim = output.dimension(i);
-        if( dim <= 1 ) {
-          TrivialDimCount++;
-          assert( dim == 1 ); // Not expecting dimension to be <= 0
-        } else {
-          size_t s = static_cast<size_t>(dim);
-          assert( s == dim ); // check we didn't lose anything in the conversion
-          NonTrivialDims.push_back(s);
-        }
-      }
-      // NB: NumElements > 1 implies this is not a scalar, so some dims should be left
-      assert( output.NumDimensions > TrivialDimCount );
-      // Now add the extra dimensions, based on object zero
-      typename TensorToVec<typename ETensor::Scalar>::type ttv = tensorToVec(* output.data());
-      Flatten<typename TensorToVec<typename ETensor::Scalar>::type> f(ttv);
-      const std::vector<size_t> & ExtraDims{f.getDim()};
-      assert(ExtraDims.size() == EigenIO::Traits<typename ETensor::Scalar>::rank_non_trivial);
-      size_t ExtraCount{1};
-      for( auto i : ExtraDims ) {
-        assert( i > 0 );
-        ExtraCount *= i;
-        NonTrivialDims.push_back(i);
-      }
-      assert(EigenIO::Traits<typename ETensor::Scalar>::count == ExtraCount);
-      assert(EigenIO::Traits<typename ETensor::Scalar>::size  == sizeof( typename ETensor::Scalar ));
-      // If the Tensor isn't in Row-Major order, then we'll need to copy it's data
-      const bool CopyData{ETensor::Layout != Eigen::StorageOptions::RowMajor};
-      using Scalar = typename ETensor::Scalar::scalar_type;
-      const Scalar * pWriteBuffer;
-      Scalar * pCopyBuffer = nullptr;
-      const typename ETensor::Index TotalNumElements = NumElements * ExtraCount;
-      if( !CopyData )
-        pWriteBuffer = output.data()->begin();
-      else {
-        // Regardless of the Eigen::Tensor storage order, the copy will be Row Major
-        pCopyBuffer = static_cast<Scalar *>(malloc(TotalNumElements * sizeof(Scalar)));
-        pWriteBuffer = pCopyBuffer;
-        Scalar * pCopy = pCopyBuffer;
-        std::array<typename ETensor::Index, ETensor::NumIndices> MyIndex;
-        for( auto &idx : MyIndex ) idx = 0;
-        for( typename ETensor::Index n = 0; n < NumElements; n++ ) {
-          // Copy the grid tensor
-          for( const Scalar &Source : output( MyIndex ) )
-            * pCopy ++ =  Source;
-          // Now increment the index
-          for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- )
-            MyIndex[i] = 0;
-        }
-      }
-      upcast->template writeMultiDim<Scalar>(s, NonTrivialDims, pWriteBuffer, TotalNumElements);
-      if( pCopyBuffer ) free( pCopyBuffer );
+
+    // Get the dimensionality of the tensor
+    std::vector<std::size_t>  TotalDims(TotalRank);
+    for(auto i = 0; i < TensorRank; i++ ) {
+      auto dim = output.dimension(i);
+      TotalDims[i] = static_cast<size_t>(dim);
+      assert( TotalDims[i] == dim ); // check we didn't lose anything in the conversion
     }
+    for(auto i = 0; i < ContainerRank; i++ )
+      TotalDims[TensorRank + i] = Traits::Dimension(i);
+
+    // If the Tensor isn't in Row-Major order, then we'll need to copy it's data
+    const bool CopyData{NumElements > 1 && ETensor::Layout != Eigen::StorageOptions::RowMajor};
+    const Scalar * pWriteBuffer;
+    Scalar * pCopyBuffer = nullptr;
+    const Index TotalNumElements = NumElements * Traits::count;
+    if( !CopyData ) {
+      if constexpr ( ContainerRank == 0 )
+        pWriteBuffer = output.data();
+      else
+        pWriteBuffer = output.data()->begin();
+    } else {
+      // Regardless of the Eigen::Tensor storage order, the copy will be Row Major
+      pCopyBuffer = static_cast<Scalar *>(malloc(TotalNumElements * sizeof(Scalar)));
+      pWriteBuffer = pCopyBuffer;
+      Scalar * pCopy = pCopyBuffer;
+      std::array<Index, TensorRank> MyIndex;
+      for( auto &idx : MyIndex ) idx = 0;
+      for( auto n = 0; n < NumElements; n++ ) {
+        if constexpr ( ContainerRank == 0 )
+          * pCopy ++ = output( MyIndex );
+        else {
+          for( const Scalar &Source : output( MyIndex ) )
+            * pCopy ++ = Source;
+        }
+        // Now increment the index
+        for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- )
+          MyIndex[i] = 0;
+      }
+    }
+    upcast->template writeMultiDim<Scalar>(s, TotalDims, pWriteBuffer, TotalNumElements);
+    if( pCopyBuffer ) free( pCopyBuffer );
   }
 
   template <typename T>
@@ -875,21 +804,21 @@ namespace Grid {
       return os;
     }
 
-    template <typename T>
-    static inline typename std::enable_if<!std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value, bool>::type
-    CompareMember(const T &lhs, const T &rhs) {
+    template <typename T1, typename T2>
+    static inline typename std::enable_if<!EigenIO::is_tensor<T1>::value || !EigenIO::is_tensor<T2>::value, bool>::type
+    CompareMember(const T1 &lhs, const T2 &rhs) {
       return lhs == rhs;
     }
 
-    template <typename T>
-    static inline typename std::enable_if<std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value, bool>::type
-    CompareMember(const T &lhs, const T &rhs) {
+    template <typename T1, typename T2>
+    static inline typename std::enable_if<EigenIO::is_tensor<T1>::value && EigenIO::is_tensor<T2>::value, bool>::type
+    CompareMember(const T1 &lhs, const T2 &rhs) {
       // First check whether dimensions match (Eigen tensor library will assert if they don't match)
-      bool bReturnValue = true;
-      for( auto i = 0 ; bReturnValue && i < T::NumIndices ; i++ )
+      bool bReturnValue = (T1::NumIndices == T2::NumIndices);
+      for( auto i = 0 ; bReturnValue && i < T1::NumIndices ; i++ )
           bReturnValue = ( lhs.dimension(i)) == rhs.dimension(i);
       if( bReturnValue ) {
-        Eigen::Tensor<bool, 0, T::Options> bResult = (lhs == rhs).all();
+        Eigen::Tensor<bool, 0, T1::Options> bResult = (lhs == rhs).all();
         bReturnValue = bResult(0);
       }
       return bReturnValue;
diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc
index 342fc214..cb4f65d5 100644
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -80,21 +80,21 @@ double   d   = 2*M_PI;
 bool     b   = false;
 
 template <typename W, typename R, typename O>
-void ioTest(const std::string &filename, const O &object, const std::string &name)
+void ioTest(const std::string &filename, const O &object, const std::string &name, const char * tag = "testobject" )
 {
   std::cout << "IO test: " << name << " -> " << filename << " ...";
   // writer needs to be destroyed so that writing physically happens
   {
     W writer(filename);
 
-    write(writer, "testobject", object);
+    write(writer, tag , object);
   }
 
   std::cout << " done. reading...";
   R    reader(filename);
   std::unique_ptr<O> buf( new O ); // In case object too big for stack
 
-  read(reader, "testobject", *buf);
+  read(reader, tag, *buf);
   bool good = Serializable::CompareMember(object, *buf);
   if (!good) {
     std::cout << " failure!" << std::endl;
@@ -107,10 +107,8 @@ void ioTest(const std::string &filename, const O &object, const std::string &nam
 
 #ifdef HAVE_HDF5
 typedef std::complex<double> TestScalar;
-using TensorSingle = Eigen::TensorFixedSize<int, Eigen::Sizes<1>>;
-using TensorSimple = Eigen::Tensor<iMatrix<TestScalar,1>, 6>;
-typedef Eigen::Tensor<unsigned short, 5> TensorRank5UShort;
-typedef Eigen::Tensor<int, 5, Eigen::StorageOptions::RowMajor> TensorRank5IntAlt;
+typedef Eigen::TensorFixedSize<unsigned short, Eigen::Sizes<5,4,3,2,1>> TensorRank5UShort;
+typedef Eigen::TensorFixedSize<unsigned short, Eigen::Sizes<5,4,3,2>, Eigen::StorageOptions::RowMajor> TensorRank5UShortAlt;
 typedef Eigen::Tensor<TestScalar, 3, Eigen::StorageOptions::RowMajor> TensorRank3;
 typedef Eigen::TensorFixedSize<TestScalar, Eigen::Sizes<9,4,2>, Eigen::StorageOptions::RowMajor> Tensor_9_4_2;
 typedef std::vector<Tensor_9_4_2> aTensor_9_4_2;
@@ -143,8 +141,8 @@ public:
   , DistilParameterValues{2,3,1,4,5,1}
   , Perambulator(2,3,1,4,5,1)
   , Perambulator2(7,1,6,1,5,1)
-  , tensorRank5UShort{5,4,3,2,1}
   , tensorRank3(7,3,2)
+  , atensor_9_4_2(3)
   {
     Grid_complex<double> Flag{1,-3.1415927};
     SequentialInit(Perambulator,  Flag);
@@ -162,7 +160,8 @@ public:
 };
 
 #define TensorWriteReadInnerNoInit( T ) \
-  ioTest<Hdf5Writer, Hdf5Reader, T>("iotest_"s + std::to_string(++TestNum) + "_" #T ".h5", t, #T);
+  filename = "iotest_"s + std::to_string(++TestNum) + "_" #T ".h5"; \
+  ioTest<Hdf5Writer, Hdf5Reader, T>(filename, t, #T, #T);
 #define TensorWriteReadInner( T )  SequentialInit( t ); TensorWriteReadInnerNoInit( T )
 #define TensorWriteRead( T      ) { T t               ; TensorWriteReadInner( T ) }
 #define TensorWriteReadV(T, ... ) { T t( __VA_ARGS__ ); TensorWriteReadInner( T ) }
@@ -170,12 +169,34 @@ public:
 
 void EigenHdf5IOTest(void)
 {
-  unsigned int TestNum = 0;
   using namespace std::string_literals;
+  unsigned int TestNum = 0;
+  std::string filename;
+  using TensorSingle = Eigen::TensorFixedSize<int, Eigen::Sizes<1>>;
   TensorWriteRead( TensorSingle )
+  using TensorSimple = Eigen::Tensor<iMatrix<TestScalar,1>, 6>;
   TensorWriteReadV( TensorSimple, 1, 1, 1, 1, 1, 1 )
   TensorWriteReadV( TensorRank3, 6, 3, 2 )
   TensorWriteRead ( Tensor_9_4_2 )
+  {
+    TensorRank5UShort t;
+    TensorWriteReadInner ( TensorRank5UShort );
+    std::cout << "    Testing alternate memory order read ... ";
+    TensorRank5UShortAlt t2;
+    Hdf5Reader reader(filename);
+    read(reader, "TensorRank5UShort", t2);
+    bool good = true;
+    for_all( t2, [&](unsigned short c, unsigned short n,
+                     const std::array<size_t, TensorRank5UShortAlt::NumIndices> &Dims ) {
+      good = good && ( c == n );
+    } );
+    if (!good) {
+      std::cout << " failure!" << std::endl;
+      dump_tensor(t2,"t2");
+      exit(EXIT_FAILURE);
+    }
+    std::cout << " done." << std::endl;
+  }
   TensorWriteRead ( LSCTensor )
   TensorWriteReadLarge( PerambIOTestClass )
 #ifdef DEBUG