diff --git a/.gitignore b/.gitignore
index 45a3ea53..5338acb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,3 +114,4 @@ gh-pages/
 #####################
 Grid/qcd/spin/gamma-gen/*.h
 Grid/qcd/spin/gamma-gen/*.cc
+Grid/util/Version.h
diff --git a/Grid/Grid.h b/Grid/Grid.h
index 9dcc207b..70859c81 100644
--- a/Grid/Grid.h
+++ b/Grid/Grid.h
@@ -42,6 +42,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #include <Grid/GridQCDcore.h>
 #include <Grid/qcd/action/Action.h>
 #include <Grid/qcd/utils/GaugeFix.h>
+#include <Grid/qcd/utils/CovariantSmearing.h>
 #include <Grid/qcd/smearing/Smearing.h>
 #include <Grid/parallelIO/MetaData.h>
 #include <Grid/qcd/hmc/HMC_aggregate.h>
diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h
index 1895dc3e..144ff29f 100644
--- a/Grid/parallelIO/BinaryIO.h
+++ b/Grid/parallelIO/BinaryIO.h
@@ -619,6 +619,7 @@ PARALLEL_CRITICAL
         {
           std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl;
           offset = offsetCopy;
+          parallel_for(uint64_t x=0;x<lsites;x++) munge(scalardata[x],iodata[x]);
         }
         else
         {
diff --git a/Grid/parallelIO/IldgIO.h b/Grid/parallelIO/IldgIO.h
index 15017be3..db07cac0 100644
--- a/Grid/parallelIO/IldgIO.h
+++ b/Grid/parallelIO/IldgIO.h
@@ -47,8 +47,10 @@ namespace Grid {
 namespace QCD {
 
 #define GRID_FIELD_NORM "FieldNormMetaData"
-#define GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck) \
-assert(0.5*fabs(FieldNormMetaData_.norm2 - n2ck)/(FieldNormMetaData_.norm2 + n2ck) < 1.0e-5 );
+#define GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) \
+0.5*fabs(FieldNormMetaData_.norm2 - n2ck)/(FieldNormMetaData_.norm2 + n2ck)
+#define GRID_FIELD_NORM_CHECK(FieldNormMetaData_, n2ck) \
+assert(GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) < 1.0e-5);
 
   /////////////////////////////////
   // Encode word types as strings
@@ -249,9 +251,9 @@ class GridLimeReader : public BinaryIO {
 	/////////////////////////////////////////////
 	if(FieldNormMetaData_.norm2 != 0.0){ 
 	  RealD n2ck = norm2(field);
-	  std::cout << GridLogMessage << "Field norm: metadata= "<<FieldNormMetaData_.norm2<< " / field= " << n2ck<<std::endl;
+	  std::cout << GridLogMessage << "Field norm: metadata= " << FieldNormMetaData_.norm2 
+              << " / field= " << n2ck << " / rdiff= " << GRID_FIELD_NORM_CALC(FieldNormMetaData_,n2ck) << std::endl;
 	  GRID_FIELD_NORM_CHECK(FieldNormMetaData_,n2ck);
-	  std::cout << GridLogMessage <<  "FieldNormMetaData OK! "<<std::endl;
 	}
 	assert(scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb)==1);
 
diff --git a/Grid/qcd/action/fermion/DomainWallFermion.h b/Grid/qcd/action/fermion/DomainWallFermion.h
index 56179f26..4de9ccf0 100644
--- a/Grid/qcd/action/fermion/DomainWallFermion.h
+++ b/Grid/qcd/action/fermion/DomainWallFermion.h
@@ -53,7 +53,7 @@ namespace Grid {
 	ComplexField coor(in._grid);
 	ComplexField ph(in._grid);  ph = zero;
 	FermionField in_buf(in._grid); in_buf = zero;
-	Complex ci(0.0,1.0);
+	Scalar ci(0.0,1.0);
 	assert(twist.size() == Nd);//check that twist is Nd
 	int shift = 0;
 	if(fiveD) shift = 1;
@@ -63,7 +63,7 @@ namespace Grid {
           LatticeCoordinate(coor, nu + shift);
 	  ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu+shift])));
 	}
-	in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in;
+	in_buf = exp(Scalar(2.0*M_PI)*ci*ph*(-1.0))*in;
 
 	if(fiveD){//FFT only on temporal and spatial dimensions
           std::vector<int> mask(Nd+1,1); mask[0] = 0;
@@ -78,7 +78,7 @@ namespace Grid {
         }
 
 	//phase for boundary condition
-	out = out * exp((Real)(2.0*M_PI)*ci*ph);
+	out = out * exp(Scalar(2.0*M_PI)*ci*ph);
       };
 
       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h
index 55942dea..436808e6 100644
--- a/Grid/qcd/action/fermion/FermionOperator.h
+++ b/Grid/qcd/action/fermion/FermionOperator.h
@@ -106,21 +106,21 @@ namespace Grid {
 	ComplexField coor(in._grid);
 	ComplexField ph(in._grid);  ph = zero;
 	FermionField in_buf(in._grid); in_buf = zero;
-	Complex ci(0.0,1.0);
+	Scalar ci(0.0,1.0);
 	assert(twist.size() == Nd);//check that twist is Nd
 	for(unsigned int nu = 0; nu < Nd; nu++)
 	{
           LatticeCoordinate(coor, nu);
 	  ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu])));
 	}
-	in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in;
+	in_buf = exp(Scalar(-2.0*M_PI)*ci*ph)*in;
 
 	theFFT.FFT_all_dim(in_k,in_buf,FFT::forward);
         this->MomentumSpacePropagator(prop_k,in_k,mass,twist);
 	theFFT.FFT_all_dim(out,prop_k,FFT::backward);
 
 	//phase for boundary condition
-	out = out * exp((Real)(2.0*M_PI)*ci*ph);
+	out = out * exp(Scalar(2.0*M_PI)*ci*ph);
 
       };
       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
diff --git a/Grid/qcd/action/gauge/GaugeImplTypes.h b/Grid/qcd/action/gauge/GaugeImplTypes.h
index 95680900..52e1e100 100644
--- a/Grid/qcd/action/gauge/GaugeImplTypes.h
+++ b/Grid/qcd/action/gauge/GaugeImplTypes.h
@@ -46,6 +46,7 @@ namespace QCD {
 
 #define INHERIT_GIMPL_TYPES(GImpl)                  \
   typedef typename GImpl::Simd Simd;                \
+  typedef typename GImpl::Scalar Scalar;	    \
   typedef typename GImpl::LinkField GaugeLinkField; \
   typedef typename GImpl::Field GaugeField;         \
   typedef typename GImpl::ComplexField ComplexField;\
@@ -63,7 +64,8 @@ namespace QCD {
 template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
 public:
   typedef S Simd;
-
+  typedef typename Simd::scalar_type scalar_type;
+  typedef scalar_type Scalar;
   template <typename vtype> using iImplScalar     = iScalar<iScalar<iScalar<vtype> > >;
   template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
   template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
diff --git a/Grid/qcd/action/gauge/Photon.h b/Grid/qcd/action/gauge/Photon.h
index f059fcf3..9afafe6c 100644
--- a/Grid/qcd/action/gauge/Photon.h
+++ b/Grid/qcd/action/gauge/Photon.h
@@ -38,6 +38,7 @@ namespace QCD{
   {
   public:
     typedef S Simd;
+    typedef typename Simd::scalar_type Scalar;
     
     template <typename vtype>
     using iImplGaugeLink  = iScalar<iScalar<iScalar<vtype>>>;
diff --git a/Grid/qcd/utils/CovariantSmearing.h b/Grid/qcd/utils/CovariantSmearing.h
new file mode 100644
index 00000000..eae5ed71
--- /dev/null
+++ b/Grid/qcd/utils/CovariantSmearing.h
@@ -0,0 +1,87 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/qcd/action/scalar/CovariantLaplacian.h
+
+Copyright (C) 2016
+
+Author: Azusa Yamaguchi
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+#pragma once
+
+namespace Grid {
+namespace QCD {
+
+template <class Gimpl> class CovariantSmearing : public Gimpl 
+{
+public:
+  INHERIT_GIMPL_TYPES(Gimpl);
+
+  typedef typename Gimpl::GaugeLinkField GaugeMat;
+  typedef typename Gimpl::GaugeField GaugeLorentz;
+
+  template<typename T>
+  static void GaussianSmear(const std::vector<LatticeColourMatrix>& U, 
+			    T& chi, 
+			    const Real& width, int Iterations, int orthog)
+  {
+    GridBase *grid = chi._grid;
+    T psi(grid);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Follow Chroma conventions for width to keep compatibility with previous data
+    // Free field iterates 
+    //   chi = (1 - w^2/4N p^2)^N chi
+    //
+    //       ~ (e^(-w^2/4N p^2)^N chi
+    //       ~ (e^(-w^2/4 p^2) chi
+    //       ~ (e^(-w'^2/2 p^2) chi          [ w' = w/sqrt(2) ]
+    //
+    // Which in coordinate space is proportional to
+    //
+    //   e^(-x^2/w^2) = e^(-x^2/2w'^2) 
+    //
+    // The 4 is a bit unconventional from Gaussian width perspective, but... it's Chroma convention.
+    // 2nd derivative approx d^2/dx^2  =  x+mu + x-mu - 2x
+    //
+    // d^2/dx^2 = - p^2
+    //
+    // chi = ( 1 + w^2/4N d^2/dx^2 )^N chi
+    //
+    ////////////////////////////////////////////////////////////////////////////////////
+    Real coeff = (width*width) / Real(4*Iterations);
+  
+    int dims = Nd;
+    if( orthog < Nd ) dims=Nd-1;
+
+    for(int n = 0; n < Iterations; ++n) {
+      psi = (-2.0*dims)*chi;
+      for(int mu=0;mu<Nd;mu++) {
+	if ( mu != orthog ) { 
+	  psi = psi + Gimpl::CovShiftForward(U[mu],mu,chi);    
+	  psi = psi + Gimpl::CovShiftBackward(U[mu],mu,chi);    
+	}
+      }
+      chi = chi + coeff*psi;
+    }
+  }
+};
+}}
diff --git a/Grid/serialisation/BaseIO.h b/Grid/serialisation/BaseIO.h
index dd15e7da..e9d5bedf 100644
--- a/Grid/serialisation/BaseIO.h
+++ b/Grid/serialisation/BaseIO.h
@@ -33,12 +33,76 @@ Author: Guido Cossu <guido.cossu@ed.ac.uk>
 #include <type_traits>
 #include <Grid/tensors/Tensors.h>
 #include <Grid/serialisation/VectorUtils.h>
+#include <Grid/Eigen/unsupported/CXX11/Tensor>
 
 namespace Grid {
+  namespace EigenIO {
+    // EigenIO works for scalars that are not just Grid supported scalars
+    template<typename T, typename V = void> struct is_complex : public std::false_type {};
+    // Support all complex types (not just Grid complex types) - even if the definitions overlap (!)
+    template<typename T> struct is_complex<             T , typename
+        std::enable_if< ::Grid::is_complex<             T >::value>::type> : public std::true_type {};
+    template<typename T> struct is_complex<std::complex<T>, typename
+        std::enable_if<!::Grid::is_complex<std::complex<T>>::value>::type> : public std::true_type {};
+
+    // Helpers to support I/O for Eigen tensors of arithmetic scalars, complex types, or Grid tensors
+    template<typename T, typename V = void> struct is_scalar : public std::false_type {};
+    template<typename T> struct is_scalar<T, typename std::enable_if<std::is_arithmetic<T>::value || is_complex<T>::value>::type> : public std::true_type {};
+
+    // Is this an Eigen tensor
+    template<typename T> struct is_tensor : std::integral_constant<bool,
+      std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {};
+
+    // Is this an Eigen tensor of a supported scalar
+    template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {};
+    template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {};
+
+    // Is this an Eigen tensor of a supported container
+    template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {};
+    template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {};
+
+    // These traits describe the scalars inside Eigen tensors
+    // I wish I could define these in reference to the scalar type (so there would be fewer traits defined)
+    // but I'm unable to find a syntax to make this work
+    template<typename T, typename V = void> struct Traits {};
+    // Traits are the default for scalars, or come from GridTypeMapper for GridTensors
+    template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_scalar<T>::value>::type>
+      : public GridTypeMapper_Base {
+      using scalar_type   = typename T::Scalar; // ultimate base scalar
+      static constexpr bool is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value;
+    };
+    // Traits are the default for scalars, or come from GridTypeMapper for GridTensors
+    template<typename T> struct Traits<T, typename std::enable_if<is_tensor_of_container<T>::value>::type> {
+      using BaseTraits  = GridTypeMapper<typename T::Scalar>;
+      using scalar_type = typename BaseTraits::scalar_type; // ultimate base scalar
+      static constexpr bool   is_complex = ::Grid::EigenIO::is_complex<scalar_type>::value;
+      static constexpr int   TensorLevel = BaseTraits::TensorLevel;
+      static constexpr int          Rank = BaseTraits::Rank;
+      static constexpr std::size_t count = BaseTraits::count;
+      static constexpr int Dimension(int dim) { return BaseTraits::Dimension(dim); }
+    };
+
+    // Is this a fixed-size Eigen tensor
+    template<typename T> struct is_tensor_fixed : public std::false_type {};
+    template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
+    struct is_tensor_fixed<Eigen::TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType>>
+        : public std::true_type {};
+    template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType,
+              int MapOptions_, template <class> class MapPointer_>
+    struct is_tensor_fixed<Eigen::TensorMap<Eigen::TensorFixedSize<Scalar_, Dimensions_,
+                                            Options_, IndexType>, MapOptions_, MapPointer_>>
+        : public std::true_type {};
+
+    // Is this a variable-size Eigen tensor
+    template<typename T, typename V = void> struct is_tensor_variable : public std::false_type {};
+    template<typename T> struct is_tensor_variable<T, typename std::enable_if<is_tensor<T>::value
+        && !is_tensor_fixed<T>::value>::type> : public std::true_type {};
+  }
+
   // Abstract writer/reader classes ////////////////////////////////////////////
   // static polymorphism implemented using CRTP idiom
   class Serializable;
-  
+
   // Static abstract writer
   template <typename T>
   class Writer
@@ -49,10 +113,10 @@ namespace Grid {
     void push(const std::string &s);
     void pop(void);
     template <typename U>
-    typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
+    typename std::enable_if<std::is_base_of<Serializable, U>::value>::type
     write(const std::string& s, const U &output);
     template <typename U>
-    typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
+    typename std::enable_if<!std::is_base_of<Serializable, U>::value && !EigenIO::is_tensor<U>::value>::type
     write(const std::string& s, const U &output);
     template <typename U>
     void write(const std::string &s, const iScalar<U> &output);
@@ -60,6 +124,42 @@ namespace Grid {
     void write(const std::string &s, const iVector<U, N> &output);
     template <typename U, int N>
     void write(const std::string &s, const iMatrix<U, N> &output);
+    template <typename ETensor>
+    typename std::enable_if<EigenIO::is_tensor<ETensor>::value>::type
+    write(const std::string &s, const ETensor &output);
+
+    // Helper functions for Scalar vs Container specialisations
+    template <typename ETensor>
+    inline typename std::enable_if<EigenIO::is_tensor_of_scalar<ETensor>::value,
+    const typename ETensor::Scalar *>::type
+    getFirstScalar(const ETensor &output)
+    {
+      return output.data();
+    }
+    
+    template <typename ETensor>
+    inline typename std::enable_if<EigenIO::is_tensor_of_container<ETensor>::value,
+    const typename EigenIO::Traits<ETensor>::scalar_type *>::type
+    getFirstScalar(const ETensor &output)
+    {
+      return output.data()->begin();
+    }
+    
+    template <typename S>
+    inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type
+    copyScalars(S * &pCopy, const S &Source)
+    {
+      * pCopy ++ = Source;
+    }
+    
+    template <typename S>
+    inline typename std::enable_if<isGridTensor<S>::value, void>::type
+    copyScalars(typename GridTypeMapper<S>::scalar_type * &pCopy, const S &Source)
+    {
+      for( const typename GridTypeMapper<S>::scalar_type &item : Source )
+        * pCopy ++ = item;
+    }
+
     void         scientificFormat(const bool set);
     bool         isScientific(void);
     void         setPrecision(const unsigned int prec);
@@ -83,7 +183,8 @@ namespace Grid {
     typename std::enable_if<std::is_base_of<Serializable, U>::value, void>::type
     read(const std::string& s, U &output);
     template <typename U>
-    typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
+    typename std::enable_if<!std::is_base_of<Serializable, U>::value
+                         && !EigenIO::is_tensor<U>::value, void>::type
     read(const std::string& s, U &output);
     template <typename U>
     void read(const std::string &s, iScalar<U> &output);
@@ -91,6 +192,32 @@ namespace Grid {
     void read(const std::string &s, iVector<U, N> &output);
     template <typename U, int N>
     void read(const std::string &s, iMatrix<U, N> &output);
+    template <typename ETensor>
+    typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
+    read(const std::string &s, ETensor &output);
+    template <typename ETensor>
+    typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type
+    Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims );
+    template <typename ETensor>
+    typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
+    Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims );
+  
+    // Helper functions for Scalar vs Container specialisations
+    template <typename S>
+    inline typename std::enable_if<EigenIO::is_scalar<S>::value, void>::type
+    copyScalars(S &Dest, const S * &pSource)
+    {
+      Dest = * pSource ++;
+    }
+    
+    template <typename S>
+    inline typename std::enable_if<isGridTensor<S>::value, void>::type
+    copyScalars(S &Dest, const typename GridTypeMapper<S>::scalar_type * &pSource)
+    {
+      for( typename GridTypeMapper<S>::scalar_type &item : Dest )
+        item = * pSource ++;
+    }
+    
   protected:
     template <typename U>
     void fromString(U &output, const std::string &s);
@@ -135,12 +262,14 @@ namespace Grid {
   
   template <typename T>
   template <typename U>
-  typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
+  typename std::enable_if<!std::is_base_of<Serializable, U>::value
+                       && !EigenIO::is_tensor<U>::value, void>::type
   Writer<T>::write(const std::string &s, const U &output)
   {
     upcast->writeDefault(s, output);
   }
 
+
   template <typename T>
   template <typename U>
   void Writer<T>::write(const std::string &s, const iScalar<U> &output)
@@ -161,6 +290,57 @@ namespace Grid {
   {
     upcast->writeDefault(s, tensorToVec(output));
   }
+  
+  // Eigen::Tensors of Grid tensors (iScalar, iVector, iMatrix)
+  template <typename T>
+  template <typename ETensor>
+  typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
+  Writer<T>::write(const std::string &s, const ETensor &output)
+  {
+    using Index = typename ETensor::Index;
+    using Container = typename ETensor::Scalar; // NB: could be same as scalar
+    using Traits = EigenIO::Traits<ETensor>;
+    using Scalar = typename Traits::scalar_type; // type of the underlying scalar
+    constexpr unsigned int TensorRank{ETensor::NumIndices};
+    constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers
+    constexpr unsigned int TotalRank{TensorRank + ContainerRank};
+    const Index NumElements{output.size()};
+    assert( NumElements > 0 );
+
+    // Get the dimensionality of the tensor
+    std::vector<std::size_t>  TotalDims(TotalRank);
+    for(auto i = 0; i < TensorRank; i++ ) {
+      auto dim = output.dimension(i);
+      TotalDims[i] = static_cast<size_t>(dim);
+      assert( TotalDims[i] == dim ); // check we didn't lose anything in the conversion
+    }
+    for(auto i = 0; i < ContainerRank; i++ )
+      TotalDims[TensorRank + i] = Traits::Dimension(i);
+
+    // If the Tensor isn't in Row-Major order, then we'll need to copy it's data
+    const bool CopyData{NumElements > 1 && ETensor::Layout != Eigen::StorageOptions::RowMajor};
+    const Scalar * pWriteBuffer;
+    std::vector<Scalar> CopyBuffer;
+    const Index TotalNumElements = NumElements * Traits::count;
+    if( !CopyData ) {
+      pWriteBuffer = getFirstScalar( output );
+    } else {
+      // Regardless of the Eigen::Tensor storage order, the copy will be Row Major
+      CopyBuffer.resize( TotalNumElements );
+      Scalar * pCopy = &CopyBuffer[0];
+      pWriteBuffer = pCopy;
+      std::array<Index, TensorRank> MyIndex;
+      for( auto &idx : MyIndex ) idx = 0;
+      for( auto n = 0; n < NumElements; n++ ) {
+        const Container & c = output( MyIndex );
+        copyScalars( pCopy, c );
+        // Now increment the index
+        for( int i = output.NumDimensions - 1; i >= 0 && ++MyIndex[i] == output.dimension(i); i-- )
+          MyIndex[i] = 0;
+      }
+    }
+    upcast->template writeMultiDim<Scalar>(s, TotalDims, pWriteBuffer, TotalNumElements);
+  }
 
   template <typename T>
   void Writer<T>::scientificFormat(const bool set)
@@ -215,7 +395,8 @@ namespace Grid {
   
   template <typename T>
   template <typename U>
-  typename std::enable_if<!std::is_base_of<Serializable, U>::value, void>::type
+  typename std::enable_if<!std::is_base_of<Serializable, U>::value
+                       && !EigenIO::is_tensor<U>::value, void>::type
   Reader<T>::read(const std::string &s, U &output)
   {
     upcast->readDefault(s, output);
@@ -251,6 +432,79 @@ namespace Grid {
     vecToTensor(output, v);
   }
 
+  template <typename T>
+  template <typename ETensor>
+  typename std::enable_if<EigenIO::is_tensor<ETensor>::value, void>::type
+  Reader<T>::read(const std::string &s, ETensor &output)
+  {
+    using Index = typename ETensor::Index;
+    using Container = typename ETensor::Scalar; // NB: could be same as scalar
+    using Traits = EigenIO::Traits<ETensor>;
+    using Scalar = typename Traits::scalar_type; // type of the underlying scalar
+    constexpr unsigned int TensorRank{ETensor::NumIndices};
+    constexpr unsigned int ContainerRank{Traits::Rank}; // Only non-zero for containers
+    constexpr unsigned int TotalRank{TensorRank + ContainerRank};
+    using ETDims = std::array<Index, TensorRank>; // Dimensions of the tensor
+
+    // read the (flat) data and dimensionality
+    std::vector<std::size_t> dimData;
+    std::vector<Scalar> buf;
+    upcast->readMultiDim( s, buf, dimData );
+    assert(dimData.size() == TotalRank && "EigenIO: Tensor rank mismatch" );
+    // Make sure that the number of elements read matches dimensions read
+    std::size_t NumContainers = 1;
+    for( auto i = 0 ; i < TensorRank ; i++ )
+      NumContainers *= dimData[i];
+    // If our scalar object is a Container, make sure it's dimensions match what we read back
+    std::size_t ElementsPerContainer = 1;
+    for( auto i = 0 ; i < ContainerRank ; i++ ) {
+      assert( dimData[TensorRank+i] == Traits::Dimension(i) && "Tensor Container dimensions don't match data" );
+      ElementsPerContainer *= dimData[TensorRank+i];
+    }
+    assert( NumContainers * ElementsPerContainer == buf.size() && "EigenIO: Number of elements != product of dimensions" );
+    // Now see whether the tensor is the right shape, or can be made to be
+    const auto & dims = output.dimensions();
+    bool bShapeOK = (output.data() != nullptr);
+    for( auto i = 0; bShapeOK && i < TensorRank ; i++ )
+      if( dims[i] != dimData[i] )
+        bShapeOK = false;
+    // Make the tensor the same size as the data read
+    ETDims MyIndex;
+    if( !bShapeOK ) {
+      for( auto i = 0 ; i < TensorRank ; i++ )
+        MyIndex[i] = dimData[i];
+      Reshape(output, MyIndex);
+    }
+    // Copy the data into the tensor
+    for( auto &d : MyIndex ) d = 0;
+    const Scalar * pSource = &buf[0];
+    for( std::size_t n = 0 ; n < NumContainers ; n++ ) {
+      Container & c = output( MyIndex );
+      copyScalars( c, pSource );
+      // Now increment the index
+      for( int i = TensorRank - 1; i != -1 && ++MyIndex[i] == dims[i]; i-- )
+        MyIndex[i] = 0;
+    }
+    assert( pSource == &buf[NumContainers * ElementsPerContainer] );
+  }
+
+  template <typename T>
+  template <typename ETensor>
+  typename std::enable_if<EigenIO::is_tensor_fixed<ETensor>::value, void>::type
+  Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
+  {
+    assert( 0 && "EigenIO: Fixed tensor dimensions can't be changed" );
+  }
+
+  template <typename T>
+  template <typename ETensor>
+  typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
+  Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
+  {
+    //t.reshape( dims );
+    t.resize( dims );
+  }
+
   template <typename T>
   template <typename U>
   void Reader<T>::fromString(U &output, const std::string &s)
@@ -289,8 +543,70 @@ namespace Grid {
     {
       return os;
     }
+
+    template <typename T1, typename T2>
+    static inline typename std::enable_if<!EigenIO::is_tensor<T1>::value || !EigenIO::is_tensor<T2>::value, bool>::type
+    CompareMember(const T1 &lhs, const T2 &rhs) {
+      return lhs == rhs;
+    }
+
+    template <typename T1, typename T2>
+    static inline typename std::enable_if<EigenIO::is_tensor<T1>::value && EigenIO::is_tensor<T2>::value, bool>::type
+    CompareMember(const T1 &lhs, const T2 &rhs) {
+      // First check whether dimensions match (Eigen tensor library will assert if they don't match)
+      bool bReturnValue = (T1::NumIndices == T2::NumIndices);
+      for( auto i = 0 ; bReturnValue && i < T1::NumIndices ; i++ )
+          bReturnValue = ( lhs.dimension(i) == rhs.dimension(i) );
+      if( bReturnValue ) {
+        Eigen::Tensor<bool, 0, T1::Options> bResult = (lhs == rhs).all();
+        bReturnValue = bResult(0);
+      }
+      return bReturnValue;
+    }
+
+    template <typename T>
+    static inline typename std::enable_if<EigenIO::is_tensor<T>::value, bool>::type
+    CompareMember(const std::vector<T> &lhs, const std::vector<T> &rhs) {
+      const auto NumElements = lhs.size();
+      bool bResult = ( NumElements == rhs.size() );
+      for( auto i = 0 ; i < NumElements && bResult ; i++ )
+        bResult = CompareMember(lhs[i], rhs[i]);
+      return bResult;
+    }
+
+    template <typename T>
+    static inline typename std::enable_if<!EigenIO::is_tensor<T>::value, void>::type
+    WriteMember(std::ostream &os, const T &object) {
+      os << object;
+    }
+    
+    template <typename T>
+    static inline typename std::enable_if<EigenIO::is_tensor<T>::value, void>::type
+    WriteMember(std::ostream &os, const T &object) {
+      using Index = typename T::Index;
+      const Index NumElements{object.size()};
+      assert( NumElements > 0 );
+      Index count = 1;
+      os << "T<";
+      for( int i = 0; i < T::NumIndices; i++ ) {
+        Index dim = object.dimension(i);
+        count *= dim;
+        if( i )
+          os << ",";
+        os << dim;
+      }
+      assert( count == NumElements && "Number of elements doesn't match tensor dimensions" );
+      os << ">{";
+      const typename T::Scalar * p = object.data();
+      for( Index i = 0; i < count; i++ ) {
+        if( i )
+          os << ",";
+        os << *p++;
+      }
+      os << "}";
+    }
   };
-  
+
   // Generic writer interface //////////////////////////////////////////////////
   template <typename T>
   inline void push(Writer<T> &w, const std::string &s) {
diff --git a/Grid/serialisation/BinaryIO.h b/Grid/serialisation/BinaryIO.h
index 757753c7..bf011454 100644
--- a/Grid/serialisation/BinaryIO.h
+++ b/Grid/serialisation/BinaryIO.h
@@ -51,6 +51,8 @@ namespace Grid {
     template <typename U>
     void writeDefault(const std::string &s, const std::vector<U> &x);
     void writeDefault(const std::string &s, const char *x);
+    template <typename U>
+    void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
   private:
     std::ofstream file_;
   };
@@ -66,6 +68,8 @@ namespace Grid {
     void readDefault(const std::string &s, U &output);
     template <typename U>
     void readDefault(const std::string &s, std::vector<U> &output);
+    template <typename U>
+    void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
   private:
     std::ifstream file_;
   };
@@ -92,6 +96,27 @@ namespace Grid {
     }
   }
   
+  template <typename U>
+  void BinaryWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
+  {
+    uint64_t rank = static_cast<uint64_t>( Dimensions.size() );
+    uint64_t tmp = 1;
+    for( auto i = 0 ; i < rank ; i++ )
+      tmp *= Dimensions[i];
+    assert( tmp == NumElements && "Dimensions don't match size of data being written" );
+    // Total number of elements
+    write("", tmp);
+    // Number of dimensions
+    write("", rank);
+    // Followed by each dimension
+    for( auto i = 0 ; i < rank ; i++ ) {
+      tmp = Dimensions[i];
+      write("", tmp);
+    }
+    for( auto i = 0; i < NumElements; ++i)
+      write("", pDataRowMajor[i]);
+  }
+
   // Reader template implementation ////////////////////////////////////////////
   template <typename U>
   void BinaryReader::readDefault(const std::string &s, U &output)
@@ -114,6 +139,30 @@ namespace Grid {
       read("", output[i]);
     }
   }
+
+  template <typename U>
+  void BinaryReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
+  {
+    // Number of elements
+    uint64_t NumElements;
+    read("", NumElements);
+    // Number of dimensions
+    uint64_t rank;
+    read("", rank);
+    // Followed by each dimension
+    uint64_t count = 1;
+    dim.resize(rank);
+    uint64_t tmp;
+    for( auto i = 0 ; i < rank ; i++ ) {
+      read("", tmp);
+      dim[i] = tmp;
+      count *= tmp;
+    }
+    assert( count == NumElements && "Dimensions don't match size of data being read" );
+    buf.resize(count);
+    for( auto i = 0; i < count; ++i)
+      read("", buf[i]);
+  }
 }
 
 #endif
diff --git a/Grid/serialisation/Hdf5IO.h b/Grid/serialisation/Hdf5IO.h
index 59804240..19537599 100644
--- a/Grid/serialisation/Hdf5IO.h
+++ b/Grid/serialisation/Hdf5IO.h
@@ -3,6 +3,7 @@
 
 #include <stack>
 #include <string>
+#include <list>
 #include <vector>
 #include <H5Cpp.h>
 #include <Grid/tensors/Tensors.h>
@@ -38,6 +39,8 @@ namespace Grid
     template <typename U>
     typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
     writeDefault(const std::string &s, const std::vector<U> &x);
+    template <typename U>
+    void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
     H5NS::Group & getGroup(void);
   private:
     template <typename U>
@@ -48,7 +51,7 @@ namespace Grid
     std::vector<std::string> path_;
     H5NS::H5File             file_;
     H5NS::Group              group_;
-    unsigned int             dataSetThres_{HDF5_DEF_DATASET_THRES};
+    const unsigned int       dataSetThres_{HDF5_DEF_DATASET_THRES};
   };
   
   class Hdf5Reader: public Reader<Hdf5Reader>
@@ -66,6 +69,8 @@ namespace Grid
     template <typename U>
     typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
     readDefault(const std::string &s, std::vector<U> &x);
+    template <typename U>
+    void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
     H5NS::Group & getGroup(void);
   private:
     template <typename U>
@@ -101,6 +106,75 @@ namespace Grid
   template <>
   void Hdf5Writer::writeDefault(const std::string &s, const std::string &x);
   
+  template <typename U>
+  void Hdf5Writer::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
+  {
+    // Hdf5 needs the dimensions as hsize_t
+    const int rank = static_cast<int>(Dimensions.size());
+    std::vector<hsize_t> dim(rank);
+    for(int i = 0; i < rank; i++)
+      dim[i] = Dimensions[i];
+    // write the entire dataset to file
+    H5NS::DataSpace dataSpace(rank, dim.data());
+
+    if (NumElements > dataSetThres_)
+    {
+      // Make sure 1) each dimension; and 2) chunk size is < 4GB
+      const hsize_t MaxElements = ( sizeof( U ) == 1 ) ? 0xffffffff : 0x100000000 / sizeof( U );
+      hsize_t ElementsPerChunk = 1;
+      bool bTooBig = false;
+      for( int i = rank - 1 ; i != -1 ; i-- ) {
+        auto &d = dim[i];
+        if( bTooBig )
+          d = 1; // Chunk size is already as big as can be - remaining dimensions = 1
+        else {
+          // If individual dimension too big, reduce by prime factors if possible
+          while( d > MaxElements && ( d & 1 ) == 0 )
+            d >>= 1;
+          const char ErrorMsg[] = " dimension > 4GB and not divisible by 2^n. "
+                                  "Hdf5IO chunk size will be inefficient. NB Serialisation is not intended for large datasets - please consider alternatives.";
+          if( d > MaxElements ) {
+            std::cout << GridLogWarning << "Individual" << ErrorMsg << std::endl;
+            hsize_t quotient = d / MaxElements;
+            if( d % MaxElements )
+              quotient++;
+            d /= quotient;
+          }
+          // Now make sure overall size is not too big
+          hsize_t OverflowCheck = ElementsPerChunk;
+          ElementsPerChunk *= d;
+          assert( OverflowCheck == ElementsPerChunk / d && "Product of dimensions overflowed hsize_t" );
+          // If product of dimensions too big, reduce by prime factors
+          while( ElementsPerChunk > MaxElements && ( ElementsPerChunk & 1 ) == 0 ) {
+            bTooBig = true;
+            d >>= 1;
+            ElementsPerChunk >>= 1;
+          }
+          if( ElementsPerChunk > MaxElements ) {
+            std::cout << GridLogWarning << "Product of" << ErrorMsg << std::endl;
+            hsize_t quotient = ElementsPerChunk / MaxElements;
+            if( ElementsPerChunk % MaxElements )
+              quotient++;
+            d /= quotient;
+            ElementsPerChunk /= quotient;
+          }
+        }
+      }
+      H5NS::DataSet           dataSet;
+      H5NS::DSetCreatPropList plist;
+      plist.setChunk(rank, dim.data());
+      plist.setFletcher32();
+      dataSet = group_.createDataSet(s, Hdf5Type<U>::type(), dataSpace, plist);
+      dataSet.write(pDataRowMajor, Hdf5Type<U>::type());
+    }
+    else
+    {
+      H5NS::Attribute attribute;
+      attribute = group_.createAttribute(s, Hdf5Type<U>::type(), dataSpace);
+      attribute.write(Hdf5Type<U>::type(), pDataRowMajor);
+    }
+  }
+
   template <typename U>
   typename std::enable_if<element<std::vector<U>>::is_number, void>::type
   Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
@@ -110,34 +184,11 @@ namespace Grid
     
     // flatten the vector and getting dimensions
     Flatten<std::vector<U>> flat(x);
-    std::vector<hsize_t> dim;
+    std::vector<size_t> dim;
     const auto           &flatx = flat.getFlatVector();
-    
     for (auto &d: flat.getDim())
-    {
       dim.push_back(d);
-    }
-    
-    // write to file
-    H5NS::DataSpace dataSpace(dim.size(), dim.data());
-    
-    if (flatx.size() > dataSetThres_)
-    {
-      H5NS::DataSet           dataSet;
-      H5NS::DSetCreatPropList plist;
-      
-      plist.setChunk(dim.size(), dim.data());
-      plist.setFletcher32();
-      dataSet = group_.createDataSet(s, Hdf5Type<Element>::type(), dataSpace, plist);
-      dataSet.write(flatx.data(), Hdf5Type<Element>::type());
-    }
-    else
-    {
-      H5NS::Attribute attribute;
-      
-      attribute = group_.createAttribute(s, Hdf5Type<Element>::type(), dataSpace);
-      attribute.write(Hdf5Type<Element>::type(), flatx.data());
-    }
+    writeMultiDim<Element>(s, dim, &flatx[0], flatx.size());
   }
   
   template <typename U>
@@ -173,10 +224,9 @@ namespace Grid
   
   template <>
   void Hdf5Reader::readDefault(const std::string &s, std::string &x);
-  
+
   template <typename U>
-  typename std::enable_if<element<std::vector<U>>::is_number, void>::type
-  Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
+  void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
   {
     // alias to element type
     typedef typename element<std::vector<U>>::type Element;
@@ -184,7 +234,6 @@ namespace Grid
     // read the dimensions
     H5NS::DataSpace       dataSpace;
     std::vector<hsize_t>  hdim;
-    std::vector<size_t>   dim;
     hsize_t               size = 1;
     
     if (group_.attrExists(s))
@@ -204,8 +253,8 @@ namespace Grid
     }
     
     // read the flat vector
-    std::vector<Element> buf(size);
-
+    buf.resize(size);
+    
     if (size > dataSetThres_)
     {
       H5NS::DataSet dataSet;
@@ -220,7 +269,19 @@ namespace Grid
       attribute = group_.openAttribute(s);
       attribute.read(Hdf5Type<Element>::type(), buf.data());
     }
-    
+  }
+
+  template <typename U>
+  typename std::enable_if<element<std::vector<U>>::is_number, void>::type
+  Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
+  {
+    // alias to element type
+    typedef typename element<std::vector<U>>::type Element;
+
+    std::vector<size_t>   dim;
+    std::vector<Element>  buf;
+    readMultiDim( s, buf, dim );
+
     // reconstruct the multidimensional vector
     Reconstruct<std::vector<U>> r(buf, dim);
     
diff --git a/Grid/serialisation/MacroMagic.h b/Grid/serialisation/MacroMagic.h
index 95cd1c3c..ce305673 100644
--- a/Grid/serialisation/MacroMagic.h
+++ b/Grid/serialisation/MacroMagic.h
@@ -109,8 +109,8 @@ THE SOFTWARE.
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 #define GRID_MACRO_MEMBER(A,B)        A B;
-#define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B));
-#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = " << obj. B << " ; " <<std::endl;
+#define GRID_MACRO_COMP_MEMBER(A,B) result = (result and CompareMember(lhs. B, rhs. B));
+#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = "; WriteMember( os, obj. B ); os << " ; " <<std::endl;
 #define GRID_MACRO_READ_MEMBER(A,B) Grid::read(RD,#B,obj. B);
 #define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B);
 
diff --git a/Grid/serialisation/TextIO.h b/Grid/serialisation/TextIO.h
index ada1aaf6..a6f2d4cf 100644
--- a/Grid/serialisation/TextIO.h
+++ b/Grid/serialisation/TextIO.h
@@ -51,6 +51,8 @@ namespace Grid
     void writeDefault(const std::string &s, const U &x);
     template <typename U>
     void writeDefault(const std::string &s, const std::vector<U> &x);
+    template <typename U>
+    void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
   private:
     void indent(void);
   private:
@@ -69,6 +71,8 @@ namespace Grid
     void readDefault(const std::string &s, U &output);
     template <typename U>
     void readDefault(const std::string &s, std::vector<U> &output);
+    template <typename U>
+    void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
   private:
     void checkIndent(void);
   private:
@@ -95,7 +99,18 @@ namespace Grid
       write(s, x[i]);
     }
   }
-  
+
+  template <typename U>
+  void TextWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
+  {
+    uint64_t Rank = Dimensions.size();
+    write(s, Rank);
+    for( uint64_t d : Dimensions )
+      write(s, d);
+    while( NumElements-- )
+      write(s, *pDataRowMajor++);
+  }
+
   // Reader template implementation ////////////////////////////////////////////
   template <typename U>
   void TextReader::readDefault(const std::string &s, U &output)
@@ -121,6 +136,23 @@ namespace Grid
       read("", output[i]);
     }
   }
+
+  template <typename U>
+  void TextReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
+  {
+    const char sz[] = "";
+    uint64_t Rank;
+    read(sz, Rank);
+    dim.resize( Rank );
+    size_t NumElements = 1;
+    for( auto &d : dim ) {
+      read(sz, d);
+      NumElements *= d;
+    }
+    buf.resize( NumElements );
+    for( auto &x : buf )
+      read(s, x);
+  }
 }
 
 #endif
diff --git a/Grid/serialisation/VectorUtils.h b/Grid/serialisation/VectorUtils.h
index b6b95c10..a5a73992 100644
--- a/Grid/serialisation/VectorUtils.h
+++ b/Grid/serialisation/VectorUtils.h
@@ -1,3 +1,32 @@
+/*************************************************************************************
+ 
+ Grid physics library, www.github.com/paboyle/Grid
+ 
+ Source file: ./Grid/serialisation/VectorUtils.h
+ 
+ Copyright (C) 2015
+ 
+ Author: Antonin Portelli <antonin.portelli@me.com>
+ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+ Author: paboyle <paboyle@ph.ed.ac.uk>
+ 
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ 
+ See the full license in the file "LICENSE" in the top level distribution directory
+ *************************************************************************************/
+/*  END LEGAL */
 #ifndef GRID_SERIALISATION_VECTORUTILS_H
 #define GRID_SERIALISATION_VECTORUTILS_H
 
@@ -53,6 +82,17 @@ namespace Grid {
     return os;
   }
   
+  // std::vector<std:vector<...>> nested to specified Rank //////////////////////////////////
+  template<typename T, unsigned int Rank>
+  struct NestedStdVector {
+    typedef typename std::vector<typename NestedStdVector<T, Rank - 1>::type> type;
+  };
+  
+  template<typename T>
+  struct NestedStdVector<T,0> {
+    typedef T type;
+  };
+  
   // Grid scalar tensors to nested std::vectors //////////////////////////////////
   template <typename T>
   struct TensorToVec
@@ -436,4 +476,4 @@ std::string vecToStr(const std::vector<T> &v)
   return sstr.str();
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/Grid/serialisation/XmlIO.h b/Grid/serialisation/XmlIO.h
index a26a33c5..40d5f2bb 100644
--- a/Grid/serialisation/XmlIO.h
+++ b/Grid/serialisation/XmlIO.h
@@ -57,6 +57,8 @@ namespace Grid
     void writeDefault(const std::string &s, const U &x);
     template <typename U>
     void writeDefault(const std::string &s, const std::vector<U> &x);
+    template <typename U>
+    void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
     std::string docString(void);
     std::string string(void);
   private:
@@ -79,6 +81,8 @@ namespace Grid
     void readDefault(const std::string &s, U &output);
     template <typename U>
     void readDefault(const std::string &s, std::vector<U> &output);
+    template <typename U>
+    void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
     void readCurrentSubtree(std::string &s);
   private:
     void checkParse(const pugi::xml_parse_result &result, const std::string name);
@@ -122,13 +126,45 @@ namespace Grid
   void XmlWriter::writeDefault(const std::string &s, const std::vector<U> &x)
   {
     push(s);
-    for (auto &x_i: x)
+    for( auto &u : x )
     {
-      write("elem", x_i);
+      write("elem", u);
     }
     pop();
   }
-  
+
+  template <typename U>
+  void XmlWriter::writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements)
+  {
+    push(s);
+    size_t count = 1;
+    const int Rank = static_cast<int>( Dimensions.size() );
+    write("rank", Rank );
+    std::vector<size_t> MyIndex( Rank );
+    for( auto d : Dimensions ) {
+      write("dim", d);
+      count *= d;
+    }
+    assert( count == NumElements && "XmlIO : element count doesn't match dimensions" );
+    static const char sName[] = "tensor";
+    for( int i = 0 ; i < Rank ; i++ ) {
+      MyIndex[i] = 0;
+      push(sName);
+    }
+    while (NumElements--) {
+      write("elem", *pDataRowMajor++);
+      int i;
+      for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == Dimensions[i] ; i-- )
+        MyIndex[i] = 0;
+      int Rollover = Rank - 1 - i;
+      for( i = 0 ; i < Rollover ; i++ )
+        pop();
+      for( i = 0 ; NumElements && i < Rollover ; i++ )
+        push(sName);
+    }
+    pop();
+  }
+
   // Reader template implementation ////////////////////////////////////////////
   template <typename U>
   void XmlReader::readDefault(const std::string &s, U &output)
@@ -145,25 +181,66 @@ namespace Grid
   template <typename U>
   void XmlReader::readDefault(const std::string &s, std::vector<U> &output)
   {
-    std::string    buf;
-    unsigned int   i = 0;
-    
     if (!push(s))
     {
       std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
       std::cout << std::endl;
-
-      return; 
+    } else {
+      for(unsigned int i = 0; node_.child("elem"); )
+      {
+        output.resize(i + 1);
+        read("elem", output[i++]);
+        node_.child("elem").set_name("elem-done");
+      }
+      pop();
+    }
+  }
+
+  template <typename U>
+  void XmlReader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
+  {
+    if (!push(s))
+    {
+      std::cout << GridLogWarning << "XML: cannot open node '" << s << "'";
+      std::cout << std::endl;
+    } else {
+      static const char sName[] = "tensor";
+      static const char sNameDone[] = "tensor-done";
+      int Rank;
+      read("rank", Rank);
+      dim.resize( Rank );
+      size_t NumElements = 1;
+      for( auto &d : dim )
+      {
+        read("dim", d);
+        node_.child("dim").set_name("dim-done");
+        NumElements *= d;
+      }
+      buf.resize( NumElements );
+      std::vector<size_t> MyIndex( Rank );
+      for( int i = 0 ; i < Rank ; i++ ) {
+        MyIndex[i] = 0;
+        push(sName);
+      }
+
+      for( auto &x : buf )
+      {
+        NumElements--;
+        read("elem", x);
+        node_.child("elem").set_name("elem-done");
+        int i;
+        for( i = Rank - 1 ; i != -1 && ++MyIndex[i] == dim[i] ; i-- )
+          MyIndex[i] = 0;
+        int Rollover = Rank - 1 - i;
+        for( i = 0 ; i < Rollover ; i++ ) {
+          node_.set_name(sNameDone);
+          pop();
+        }
+        for( i = 0 ; NumElements && i < Rollover ; i++ )
+          push(sName);
+      }
+      pop();
     }
-    while (node_.child("elem"))
-    {
-      output.resize(i + 1);
-      read("elem", output[i]);
-      node_.child("elem").set_name("elem-done");
-      i++;
-    }
-    pop();
   }
-  
 }
 #endif
diff --git a/Grid/simd/Grid_vector_types.h b/Grid/simd/Grid_vector_types.h
index c67e74cb..707af211 100644
--- a/Grid/simd/Grid_vector_types.h
+++ b/Grid/simd/Grid_vector_types.h
@@ -10,6 +10,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Guido Cossu <cossu@iroiro-pc.kek.jp>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: neo <cossu@post.kek.jp>
+Author: Michael Marshall <michael.marshall@ed.ac.au>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -89,17 +90,25 @@ template <typename Condition, typename ReturnType> using NotEnableIf = Invoke<st
 ////////////////////////////////////////////////////////
 // Check for complexity with type traits
 template <typename T> struct is_complex : public std::false_type {};
-template <> struct is_complex<std::complex<double> > : public std::true_type {};
-template <> struct is_complex<std::complex<float> > : public std::true_type {};
+template <> struct is_complex<ComplexD> : public std::true_type {};
+template <> struct is_complex<ComplexF> : public std::true_type {};
 
-template <typename T>              using IfReal    = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
+template<typename T, typename V=void> struct is_real : public std::false_type {};
+template<typename T> struct is_real<T, typename std::enable_if<std::is_floating_point<T>::value,
+  void>::type> : public std::true_type {};
+
+template<typename T, typename V=void> struct is_integer : public std::false_type {};
+template<typename T> struct is_integer<T, typename std::enable_if<std::is_integral<T>::value,
+  void>::type> : public std::true_type {};
+  
+template <typename T>              using IfReal    = Invoke<std::enable_if<is_real<T>::value, int> >;
 template <typename T>              using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
-template <typename T>              using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
+template <typename T>              using IfInteger = Invoke<std::enable_if<is_integer<T>::value, int> >;
 template <typename T1,typename T2> using IfSame    = Invoke<std::enable_if<std::is_same<T1,T2>::value, int> >;
 
-template <typename T>              using IfNotReal    = Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
+template <typename T>              using IfNotReal    = Invoke<std::enable_if<!is_real<T>::value, int> >;
 template <typename T>              using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
-template <typename T>              using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
+template <typename T>              using IfNotInteger = Invoke<std::enable_if<!is_integer<T>::value, int> >;
 template <typename T1,typename T2> using IfNotSame    = Invoke<std::enable_if<!std::is_same<T1,T2>::value, int> >;
 
 ////////////////////////////////////////////////////////
@@ -857,8 +866,10 @@ template <typename T>
 struct is_simd : public std::false_type {};
 template <> struct is_simd<vRealF>     : public std::true_type {};
 template <> struct is_simd<vRealD>     : public std::true_type {};
+template <> struct is_simd<vRealH>     : public std::true_type {};
 template <> struct is_simd<vComplexF>  : public std::true_type {};
 template <> struct is_simd<vComplexD>  : public std::true_type {};
+template <> struct is_simd<vComplexH>  : public std::true_type {};
 template <> struct is_simd<vInteger>   : public std::true_type {};
 
 template <typename T> using IfSimd    = Invoke<std::enable_if<is_simd<T>::value, int> >;
diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h
index c7f868db..d59640df 100644
--- a/Grid/tensors/Tensor_class.h
+++ b/Grid/tensors/Tensor_class.h
@@ -5,6 +5,7 @@ Copyright (C) 2015
 
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: Michael Marshall <michael.marshall@ed.ac.au>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -42,27 +43,26 @@ namespace Grid {
 //
 class GridTensorBase {};
 
+// Too late to remove these traits from Grid Tensors, so inherit from GridTypeMapper
+#define GridVector_CopyTraits \
+  using element = vtype; \
+  using scalar_type     = typename Traits::scalar_type; \
+  using vector_type     = typename Traits::vector_type; \
+  using vector_typeD    = typename Traits::vector_typeD; \
+  using tensor_reduced  = typename Traits::tensor_reduced; \
+  using scalar_object   = typename Traits::scalar_object; \
+  using Complexified    = typename Traits::Complexified; \
+  using Realified       = typename Traits::Realified; \
+  using DoublePrecision = typename Traits::DoublePrecision; \
+  static constexpr int TensorLevel = Traits::TensorLevel
+
 template <class vtype>
 class iScalar {
  public:
   vtype _internal;
 
-  typedef vtype element;
-  typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
-  typedef typename GridTypeMapper<vtype>::vector_type vector_type;
-  typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
-  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
-  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
-  typedef iScalar<tensor_reduced_v> tensor_reduced;
-  typedef iScalar<recurse_scalar_object> scalar_object;
-  // substitutes a real or complex version with same tensor structure
-  typedef iScalar<typename GridTypeMapper<vtype>::Complexified> Complexified;
-  typedef iScalar<typename GridTypeMapper<vtype>::Realified> Realified;
-
-  // get double precision version
-  typedef iScalar<typename GridTypeMapper<vtype>::DoublePrecision> DoublePrecision;
-  
-  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
+  using Traits = GridTypeMapper<iScalar<vtype> >;
+  GridVector_CopyTraits;
 
   // Scalar no action
   //  template<int Level> using tensor_reduce_level = typename
@@ -173,7 +173,10 @@ class iScalar {
     return stream;
   };
 
-
+  strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(&_internal); }
+  strong_inline       scalar_type * begin()       { return reinterpret_cast<      scalar_type *>(&_internal); }
+  strong_inline const scalar_type * end()   const { return begin() + Traits::count; }
+  strong_inline       scalar_type * end()         { return begin() + Traits::count; }
 };
 ///////////////////////////////////////////////////////////
 // Allows to turn scalar<scalar<scalar<double>>>> back to double.
@@ -194,22 +197,9 @@ class iVector {
  public:
   vtype _internal[N];
 
-  typedef vtype element;
-  typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
-  typedef typename GridTypeMapper<vtype>::vector_type vector_type;
-  typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
-  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
-  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
-  typedef iScalar<tensor_reduced_v> tensor_reduced;
-  typedef iVector<recurse_scalar_object, N> scalar_object;
+  using Traits = GridTypeMapper<iVector<vtype, N> >;
+  GridVector_CopyTraits;
 
-  // substitutes a real or complex version with same tensor structure
-  typedef iVector<typename GridTypeMapper<vtype>::Complexified, N> Complexified;
-  typedef iVector<typename GridTypeMapper<vtype>::Realified, N> Realified;
-
-  // get double precision version
-  typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
-  
   template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type
                          * = nullptr>
   strong_inline auto operator=(T arg) -> iVector<vtype, N> {
@@ -218,7 +208,6 @@ class iVector {
     return *this;
   }
 
-  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
   iVector(const Zero &z) { *this = zero; };
   iVector() = default;
   /*
@@ -303,6 +292,11 @@ class iVector {
   //    strong_inline vtype && operator ()(int i) {
   //      return _internal[i];
   //    }
+
+  strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(_internal); }
+  strong_inline       scalar_type * begin()       { return reinterpret_cast<      scalar_type *>(_internal); }
+  strong_inline const scalar_type * end()   const { return begin() + Traits::count; }
+  strong_inline       scalar_type * end()         { return begin() + Traits::count; }
 };
 
 template <class vtype, int N>
@@ -310,25 +304,8 @@ class iMatrix {
  public:
   vtype _internal[N][N];
 
-  typedef vtype element;
-  typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
-  typedef typename GridTypeMapper<vtype>::vector_type vector_type;
-  typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
-  typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
-  typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
-
-  // substitutes a real or complex version with same tensor structure
-  typedef iMatrix<typename GridTypeMapper<vtype>::Complexified, N> Complexified;
-  typedef iMatrix<typename GridTypeMapper<vtype>::Realified, N> Realified;
-
-  // get double precision version
-  typedef iMatrix<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
-  
-  // Tensor removal
-  typedef iScalar<tensor_reduced_v> tensor_reduced;
-  typedef iMatrix<recurse_scalar_object, N> scalar_object;
-
-  enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
+  using Traits = GridTypeMapper<iMatrix<vtype, N> >;
+  GridVector_CopyTraits;
 
   iMatrix(const Zero &z) { *this = zero; };
   iMatrix() = default;
@@ -458,6 +435,11 @@ class iMatrix {
   //  strong_inline vtype && operator ()(int i,int j) {
   //    return _internal[i][j];
   //  }
+
+  strong_inline const scalar_type * begin() const { return reinterpret_cast<const scalar_type *>(_internal[0]); }
+  strong_inline       scalar_type * begin()       { return reinterpret_cast<      scalar_type *>(_internal[0]); }
+  strong_inline const scalar_type * end()   const { return begin() + Traits::count; }
+  strong_inline       scalar_type * end()         { return begin() + Traits::count; }
 };
 
 template <class v>
@@ -480,6 +462,3 @@ void vprefetch(const iMatrix<v, N> &vv) {
 }
 }
 #endif
-
-
-
diff --git a/Grid/tensors/Tensor_traits.h b/Grid/tensors/Tensor_traits.h
index c1ef397a..45a1d807 100644
--- a/Grid/tensors/Tensor_traits.h
+++ b/Grid/tensors/Tensor_traits.h
@@ -5,6 +5,7 @@
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: Christopher Kelly <ckelly@phys.columbia.edu>
+Author: Michael Marshall <michael.marshall@ed.ac.au>
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
@@ -26,6 +27,17 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
 
 namespace Grid {
 
+  // Forward declarations
+  template<class T>        class iScalar;
+  template<class T, int N> class iVector;
+  template<class T, int N> class iMatrix;
+
+  // These are the Grid tensors
+  template<typename T>     struct isGridTensor                : public std::false_type { static constexpr bool notvalue = true; };
+  template<class T>        struct isGridTensor<iScalar<T>>    : public std::true_type  { static constexpr bool notvalue = false; };
+  template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type  { static constexpr bool notvalue = false; };
+  template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type  { static constexpr bool notvalue = false; };
+
 //////////////////////////////////////////////////////////////////////////////////
 // Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
 // Use of a helper class like this allows us to template specialise and "dress"
@@ -40,25 +52,26 @@ namespace Grid {
 // to study C++11's type_traits.h file. (std::enable_if<isGridTensorType<vtype> >)
 //
 //////////////////////////////////////////////////////////////////////////////////
-  
-  template <class T> class GridTypeMapper {
-  public:
-    typedef typename T::scalar_type scalar_type;
-    typedef typename T::vector_type vector_type;
-    typedef typename T::vector_typeD vector_typeD;
-    typedef typename T::tensor_reduced tensor_reduced;
-    typedef typename T::scalar_object scalar_object;
-    typedef typename T::Complexified Complexified;
-    typedef typename T::Realified Realified;
-    typedef typename T::DoublePrecision DoublePrecision;
-    enum { TensorLevel = T::TensorLevel };
+
+  // This saves repeating common properties for supported Grid Scalar types
+  // TensorLevel    How many nested grid tensors
+  // Rank           Rank of the grid tensor
+  // count          Total number of elements, i.e. product of dimensions
+  // Dimension(dim) Size of dimension dim
+  struct GridTypeMapper_Base {
+    static constexpr int TensorLevel = 0;
+    static constexpr int Rank = 0;
+    static constexpr std::size_t count = 1;
+    static constexpr int Dimension(int dim) { return 0; }
   };
 
 //////////////////////////////////////////////////////////////////////////////////
 // Recursion stops with these template specialisations
 //////////////////////////////////////////////////////////////////////////////////
-  template<> class GridTypeMapper<RealF> {
-  public:
+
+  template<typename T> struct GridTypeMapper {};
+
+  template<> struct GridTypeMapper<RealF> : public GridTypeMapper_Base {
     typedef RealF scalar_type;
     typedef RealF vector_type;
     typedef RealD vector_typeD;
@@ -67,10 +80,8 @@ namespace Grid {
     typedef ComplexF Complexified;
     typedef RealF Realified;
     typedef RealD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<RealD> {
-  public:
+  template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
     typedef RealD scalar_type;
     typedef RealD vector_type;
     typedef RealD vector_typeD;
@@ -79,10 +90,8 @@ namespace Grid {
     typedef ComplexD Complexified;
     typedef RealD Realified;
     typedef RealD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<ComplexF> {
-  public:
+  template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
     typedef ComplexF scalar_type;
     typedef ComplexF vector_type;
     typedef ComplexD vector_typeD;
@@ -91,10 +100,8 @@ namespace Grid {
     typedef ComplexF Complexified;
     typedef RealF Realified;
     typedef ComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<ComplexD> {
-  public:
+  template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
     typedef ComplexD scalar_type;
     typedef ComplexD vector_type;
     typedef ComplexD vector_typeD;
@@ -103,10 +110,8 @@ namespace Grid {
     typedef ComplexD Complexified;
     typedef RealD Realified;
     typedef ComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<Integer> {
-  public:
+  template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
     typedef Integer scalar_type;
     typedef Integer vector_type;
     typedef Integer vector_typeD;
@@ -115,11 +120,9 @@ namespace Grid {
     typedef void Complexified;
     typedef void Realified;
     typedef void DoublePrecision;
-    enum { TensorLevel = 0 };
   };
 
-  template<> class GridTypeMapper<vRealF> {
-  public:
+  template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
     typedef RealF  scalar_type;
     typedef vRealF vector_type;
     typedef vRealD vector_typeD;
@@ -128,10 +131,8 @@ namespace Grid {
     typedef vComplexF Complexified;
     typedef vRealF Realified;
     typedef vRealD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<vRealD> {
-  public:
+  template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
     typedef RealD  scalar_type;
     typedef vRealD vector_type;
     typedef vRealD vector_typeD;
@@ -140,10 +141,20 @@ namespace Grid {
     typedef vComplexD Complexified;
     typedef vRealD Realified;
     typedef vRealD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<vComplexH> {
-  public:
+  template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
+    // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
+    typedef RealF  scalar_type;
+    typedef vRealH vector_type;
+    typedef vRealD vector_typeD;
+    typedef vRealH tensor_reduced;
+    typedef RealF  scalar_object;
+    typedef vComplexH Complexified;
+    typedef vRealH Realified;
+    typedef vRealD DoublePrecision;
+  };
+  template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
+    // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
     typedef ComplexF  scalar_type;
     typedef vComplexH vector_type;
     typedef vComplexD vector_typeD;
@@ -152,10 +163,8 @@ namespace Grid {
     typedef vComplexH Complexified;
     typedef vRealH Realified;
     typedef vComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<vComplexF> {
-  public:
+  template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
     typedef ComplexF  scalar_type;
     typedef vComplexF vector_type;
     typedef vComplexD vector_typeD;
@@ -164,10 +173,8 @@ namespace Grid {
     typedef vComplexF Complexified;
     typedef vRealF Realified;
     typedef vComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<vComplexD> {
-  public:
+  template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
     typedef ComplexD  scalar_type;
     typedef vComplexD vector_type;
     typedef vComplexD vector_typeD;
@@ -176,10 +183,8 @@ namespace Grid {
     typedef vComplexD Complexified;
     typedef vRealD Realified;
     typedef vComplexD DoublePrecision;
-    enum { TensorLevel = 0 };
   };
-  template<> class GridTypeMapper<vInteger> {
-  public:
+  template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
     typedef  Integer scalar_type;
     typedef vInteger vector_type;
     typedef vInteger vector_typeD;
@@ -188,57 +193,52 @@ namespace Grid {
     typedef void Complexified;
     typedef void Realified;
     typedef void DoublePrecision;
-    enum { TensorLevel = 0 };
   };
 
-  // First some of my own traits
-  template<typename T> struct isGridTensor {
-    static const bool value = true;
-    static const bool notvalue = false;
+#define GridTypeMapper_RepeatedTypes \
+  using BaseTraits   = GridTypeMapper<T>; \
+  using scalar_type  = typename BaseTraits::scalar_type; \
+  using vector_type  = typename BaseTraits::vector_type; \
+  using vector_typeD = typename BaseTraits::vector_typeD; \
+  static constexpr int TensorLevel = BaseTraits::TensorLevel + 1
+
+  template<typename T> struct GridTypeMapper<iScalar<T>> {
+    GridTypeMapper_RepeatedTypes;
+    using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>;
+    using scalar_object   = iScalar<typename BaseTraits::scalar_object>;
+    using Complexified    = iScalar<typename BaseTraits::Complexified>;
+    using Realified       = iScalar<typename BaseTraits::Realified>;
+    using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
+    static constexpr int Rank = BaseTraits::Rank + 1;
+    static constexpr std::size_t count = BaseTraits::count;
+    static constexpr int Dimension(int dim) {
+      return ( dim == 0 ) ? 1 : BaseTraits::Dimension(dim - 1); }
   };
-  template<> struct isGridTensor<int > {
-    static const bool value = false;
-    static const bool notvalue = true;
+
+  template<typename T, int N> struct GridTypeMapper<iVector<T, N>> {
+    GridTypeMapper_RepeatedTypes;
+    using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>;
+    using scalar_object   = iVector<typename BaseTraits::scalar_object,   N>;
+    using Complexified    = iVector<typename BaseTraits::Complexified,    N>;
+    using Realified       = iVector<typename BaseTraits::Realified,       N>;
+    using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
+    static constexpr int Rank = BaseTraits::Rank + 1;
+    static constexpr std::size_t count = BaseTraits::count * N;
+    static constexpr int Dimension(int dim) {
+      return ( dim == 0 ) ? N : BaseTraits::Dimension(dim - 1); }
   };
-  template<> struct isGridTensor<RealD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<RealF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<ComplexD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<ComplexF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<Integer > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vRealD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vRealF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vComplexD > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vComplexF > {
-    static const bool value = false;
-    static const bool notvalue = true;
-  };
-  template<> struct isGridTensor<vInteger > {
-    static const bool value = false;
-    static const bool notvalue = true;
+
+  template<typename T, int N> struct GridTypeMapper<iMatrix<T, N>> {
+    GridTypeMapper_RepeatedTypes;
+    using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>;
+    using scalar_object   = iMatrix<typename BaseTraits::scalar_object,   N>;
+    using Complexified    = iMatrix<typename BaseTraits::Complexified,    N>;
+    using Realified       = iMatrix<typename BaseTraits::Realified,       N>;
+    using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
+    static constexpr int Rank = BaseTraits::Rank + 2;
+    static constexpr std::size_t count = BaseTraits::count * N * N;
+    static constexpr int Dimension(int dim) {
+      return ( dim == 0 || dim == 1 ) ? N : BaseTraits::Dimension(dim - 2); }
   };
 
   // Match the index
@@ -263,20 +263,13 @@ namespace Grid {
     typedef T type;
   };
   
-  //Query if a tensor or Lattice<Tensor> is SIMD vector or scalar
-  template<typename T>
-  class isSIMDvectorized{
-    template<typename U>
-    static typename std::enable_if< !std::is_same< typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,   
-      typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, char>::type test(void *);
+  //Query whether a tensor or Lattice<Tensor> is SIMD vector or scalar
+  template<typename T, typename V=void> struct isSIMDvectorized : public std::false_type {};
+  template<typename U> struct isSIMDvectorized<U, typename std::enable_if< !std::is_same<
+    typename GridTypeMapper<typename getVectorType<U>::type>::scalar_type,
+    typename GridTypeMapper<typename getVectorType<U>::type>::vector_type>::value, void>::type>
+  : public std::true_type {};
 
-    template<typename U>
-    static double test(...);
-  
-  public:
-    enum {value = sizeof(test<T>(0)) == sizeof(char) };
-  };
-  
   //Get the precision of a Lattice, tensor or scalar type in units of sizeof(float)
   template<typename T>
   class getPrecision{
diff --git a/Hadrons/Application.cc b/Hadrons/Application.cc
index be2e87a1..e6fb85fa 100644
--- a/Hadrons/Application.cc
+++ b/Hadrons/Application.cc
@@ -48,28 +48,32 @@ Application::Application(void)
 {
     initLogger();
     auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim);
-    locVol_ = 1;
-    for (unsigned int d = 0; d < dim.size(); ++d)
+
+    if (dim.size())
     {
-        loc[d]  /= mpi[d];
-        locVol_ *= loc[d];
+        locVol_ = 1;
+        for (unsigned int d = 0; d < dim.size(); ++d)
+        {
+            loc[d]  /= mpi[d];
+            locVol_ *= loc[d];
+        }
+        LOG(Message) << "====== HADRONS APPLICATION INITIALISATION ======" << std::endl;
+        LOG(Message) << "** Dimensions" << std::endl;
+        LOG(Message) << "Global lattice: " << dim << std::endl;
+        LOG(Message) << "MPI partition : " << mpi << std::endl;
+        LOG(Message) << "Local lattice : " << loc << std::endl;
+        LOG(Message) << std::endl;
+        LOG(Message) << "** Default parameters (and associated C macros)" << std::endl;
+        LOG(Message) << "ASCII output precision  : " << MACOUT(DEFAULT_ASCII_PREC) << std::endl;
+        LOG(Message) << "Fermion implementation  : " << MACOUTS(FIMPLBASE) << std::endl;
+        LOG(Message) << "z-Fermion implementation: " << MACOUTS(ZFIMPLBASE) << std::endl;
+        LOG(Message) << "Scalar implementation   : " << MACOUTS(SIMPLBASE) << std::endl;
+        LOG(Message) << "Gauge implementation    : " << MACOUTS(GIMPLBASE) << std::endl;
+        LOG(Message) << "Eigenvector base size   : " 
+                    << MACOUT(HADRONS_DEFAULT_LANCZOS_NBASIS) << std::endl;
+        LOG(Message) << "Schur decomposition     : " << MACOUTS(HADRONS_DEFAULT_SCHUR) << std::endl;
+        LOG(Message) << std::endl;
     }
-    LOG(Message) << "====== HADRONS APPLICATION INITIALISATION ======" << std::endl;
-    LOG(Message) << "** Dimensions" << std::endl;
-    LOG(Message) << "Global lattice: " << dim << std::endl;
-    LOG(Message) << "MPI partition : " << mpi << std::endl;
-    LOG(Message) << "Local lattice : " << loc << std::endl;
-    LOG(Message) << std::endl;
-    LOG(Message) << "** Default parameters (and associated C macros)" << std::endl;
-    LOG(Message) << "ASCII output precision  : " << MACOUT(DEFAULT_ASCII_PREC) << std::endl;
-    LOG(Message) << "Fermion implementation  : " << MACOUTS(FIMPLBASE) << std::endl;
-    LOG(Message) << "z-Fermion implementation: " << MACOUTS(ZFIMPLBASE) << std::endl;
-    LOG(Message) << "Scalar implementation   : " << MACOUTS(SIMPLBASE) << std::endl;
-    LOG(Message) << "Gauge implementation    : " << MACOUTS(GIMPLBASE) << std::endl;
-    LOG(Message) << "Eigenvector base size   : " 
-                 << MACOUT(HADRONS_DEFAULT_LANCZOS_NBASIS) << std::endl;
-    LOG(Message) << "Schur decomposition     : " << MACOUTS(HADRONS_DEFAULT_SCHUR) << std::endl;
-    LOG(Message) << std::endl;
 }
 
 Application::Application(const Application::GlobalPar &par)
diff --git a/Hadrons/Environment.cc b/Hadrons/Environment.cc
index e7a7ac55..540b3620 100644
--- a/Hadrons/Environment.cc
+++ b/Hadrons/Environment.cc
@@ -43,15 +43,13 @@ HADRONS_ERROR_REF(ObjectDefinition, "no object with address " + std::to_string(a
 // constructor /////////////////////////////////////////////////////////////////
 Environment::Environment(void)
 {
-    dim_         = GridDefaultLatt();
-    nd_          = dim_.size();
-    createGrid<vComplex>(1);
+    dim_ = GridDefaultLatt();
+    nd_  = dim_.size();
     vol_ = 1.;
     for (auto d: dim_)
     {
         vol_ *= d;
     }
-    rng4d_.reset(new GridParallelRNG(getGrid()));
 }
 
 // grids ///////////////////////////////////////////////////////////////////////
@@ -76,8 +74,13 @@ double Environment::getVolume(void) const
 }
 
 // random number generator /////////////////////////////////////////////////////
-GridParallelRNG * Environment::get4dRng(void) const
+GridParallelRNG * Environment::get4dRng(void)
 {
+    if (rng4d_ == nullptr)
+    {
+        rng4d_.reset(new GridParallelRNG(getGrid()));
+    }
+
     return rng4d_.get();
 }
 
diff --git a/Hadrons/Environment.hpp b/Hadrons/Environment.hpp
index b77345c8..24f6b31e 100644
--- a/Hadrons/Environment.hpp
+++ b/Hadrons/Environment.hpp
@@ -113,7 +113,7 @@ public:
     unsigned int            getNd(void) const;
     double                  getVolume(void) const;
     // random number generator
-    GridParallelRNG *       get4dRng(void) const;
+    GridParallelRNG *       get4dRng(void);
     // general memory management
     void                    addObject(const std::string name,
                                       const int moduleAddress = -1);
@@ -182,7 +182,7 @@ private:
     std::map<CoarseGridKey, GridPt>     gridCoarse5d_;
     unsigned int                        nd_;
     // random number generator
-    RngPt                               rng4d_;
+    RngPt                               rng4d_{nullptr};
     // object store
     std::vector<ObjInfo>                object_;
     std::map<std::string, unsigned int> objectAddress_;
diff --git a/Hadrons/Modules/MAction/DWF.hpp b/Hadrons/Modules/MAction/DWF.hpp
index bac70ae8..21024a4f 100644
--- a/Hadrons/Modules/MAction/DWF.hpp
+++ b/Hadrons/Modules/MAction/DWF.hpp
@@ -112,8 +112,6 @@ void TDWF<FImpl>::setup(void)
                  << par().mass << ", M5= " << par().M5 << " and Ls= "
                  << par().Ls << " using gauge field '" << par().gauge << "'"
                  << std::endl;
-    LOG(Message) << "Fermion boundary conditions: " << par().boundary
-                 << std::endl;
                  
     auto &U    = envGet(GaugeField, par().gauge);
     auto &g4   = *envGetGrid(FermionField);
@@ -121,8 +119,26 @@ void TDWF<FImpl>::setup(void)
     auto &g5   = *envGetGrid(FermionField, par().Ls);
     auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
     typename DomainWallFermion<FImpl>::ImplParams implParams;
-    implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    if (!par().boundary.empty())
+    {
+        implParams.boundary_phases = strToVec<Complex>(par().boundary);
+    }
+    if (!par().twist.empty())
+    {
+        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    }
+    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
+                 << std::endl;
+    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
+                 << std::endl;
+    if (implParams.boundary_phases.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of boundary phase");
+    }
+    if (implParams.twist_n_2pi_L.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of twist");
+    }
     envCreateDerived(FMat, DomainWallFermion<FImpl>, getName(), par().Ls, U, g5,
                      grb5, g4, grb4, par().mass, par().M5, implParams);
 }
diff --git a/Hadrons/Modules/MAction/MobiusDWF.hpp b/Hadrons/Modules/MAction/MobiusDWF.hpp
index 6b960e2e..3ce02b91 100644
--- a/Hadrons/Modules/MAction/MobiusDWF.hpp
+++ b/Hadrons/Modules/MAction/MobiusDWF.hpp
@@ -112,17 +112,33 @@ void TMobiusDWF<FImpl>::setup(void)
                  << ", b= " << par().b << ", c= " << par().c
                  << " using gauge field '" << par().gauge << "'"
                  << std::endl;
-    LOG(Message) << "Fermion boundary conditions: " << par().boundary
-                 << std::endl;
-
+                 
     auto &U    = envGet(GaugeField, par().gauge);
     auto &g4   = *envGetGrid(FermionField);
     auto &grb4 = *envGetRbGrid(FermionField);
     auto &g5   = *envGetGrid(FermionField, par().Ls);
     auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
     typename MobiusFermion<FImpl>::ImplParams implParams;
-    implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    if (!par().boundary.empty())
+    {
+        implParams.boundary_phases = strToVec<Complex>(par().boundary);
+    }
+    if (!par().twist.empty())
+    {
+        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    }
+    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
+                 << std::endl;
+    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
+                 << std::endl;
+    if (implParams.boundary_phases.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of boundary phase");
+    }
+    if (implParams.twist_n_2pi_L.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of twist");
+    }
     envCreateDerived(FMat, MobiusFermion<FImpl>, getName(), par().Ls, U, g5,
                      grb5, g4, grb4, par().mass, par().M5, par().b, par().c,
                      implParams);
diff --git a/Hadrons/Modules/MAction/ScaledDWF.hpp b/Hadrons/Modules/MAction/ScaledDWF.hpp
index 8742a820..3e052225 100644
--- a/Hadrons/Modules/MAction/ScaledDWF.hpp
+++ b/Hadrons/Modules/MAction/ScaledDWF.hpp
@@ -111,8 +111,6 @@ void TScaledDWF<FImpl>::setup(void)
                  << ", scale= " << par().scale
                  << " using gauge field '" << par().gauge << "'"
                  << std::endl;
-    LOG(Message) << "Fermion boundary conditions: " << par().boundary
-                 << std::endl;
 
     auto &U    = envGet(GaugeField, par().gauge);
     auto &g4   = *envGetGrid(FermionField);
@@ -120,8 +118,26 @@ void TScaledDWF<FImpl>::setup(void)
     auto &g5   = *envGetGrid(FermionField, par().Ls);
     auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
     typename ScaledShamirFermion<FImpl>::ImplParams implParams;
-    implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    if (!par().boundary.empty())
+    {
+        implParams.boundary_phases = strToVec<Complex>(par().boundary);
+    }
+    if (!par().twist.empty())
+    {
+        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    }
+    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
+                 << std::endl;
+    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
+                 << std::endl;
+    if (implParams.boundary_phases.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of boundary phase");
+    }
+    if (implParams.twist_n_2pi_L.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of twist");
+    }
     envCreateDerived(FMat, ScaledShamirFermion<FImpl>, getName(), par().Ls, U, g5,
                      grb5, g4, grb4, par().mass, par().M5, par().scale,
                      implParams);
diff --git a/Hadrons/Modules/MAction/Wilson.hpp b/Hadrons/Modules/MAction/Wilson.hpp
index 3ce5569e..11df89c3 100644
--- a/Hadrons/Modules/MAction/Wilson.hpp
+++ b/Hadrons/Modules/MAction/Wilson.hpp
@@ -109,15 +109,31 @@ void TWilson<FImpl>::setup(void)
 {
     LOG(Message) << "Setting up Wilson fermion matrix with m= " << par().mass
                  << " using gauge field '" << par().gauge << "'" << std::endl;
-    LOG(Message) << "Fermion boundary conditions: " << par().boundary
-                 << std::endl;
                  
     auto &U      = envGet(GaugeField, par().gauge);
     auto &grid   = *envGetGrid(FermionField);
     auto &gridRb = *envGetRbGrid(FermionField);
     typename WilsonFermion<FImpl>::ImplParams implParams;
-    implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    if (!par().boundary.empty())
+    {
+        implParams.boundary_phases = strToVec<Complex>(par().boundary);
+    }
+    if (!par().twist.empty())
+    {
+        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    }
+    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
+                 << std::endl;
+    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
+                 << std::endl;
+    if (implParams.boundary_phases.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of boundary phase");
+    }
+    if (implParams.twist_n_2pi_L.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of twist");
+    }
     envCreateDerived(FMat, WilsonFermion<FImpl>, getName(), 1, U, grid, gridRb,
                      par().mass, implParams);
 }
diff --git a/Hadrons/Modules/MAction/WilsonClover.hpp b/Hadrons/Modules/MAction/WilsonClover.hpp
index a9ea8de5..d80bccdf 100644
--- a/Hadrons/Modules/MAction/WilsonClover.hpp
+++ b/Hadrons/Modules/MAction/WilsonClover.hpp
@@ -112,17 +112,34 @@ void TWilsonClover<FImpl>::setup(void)
 {
     LOG(Message) << "Setting up Wilson clover fermion matrix with m= " << par().mass
                  << " using gauge field '" << par().gauge << "'" << std::endl;
-    LOG(Message) << "Fermion boundary conditions: " << par().boundary 
-                 << std::endl;
     LOG(Message) << "Clover term csw_r: " << par().csw_r
                  << " csw_t: " << par().csw_t
                  << std::endl;
+                 
     auto &U      = envGet(GaugeField, par().gauge);
     auto &grid   = *envGetGrid(FermionField);
     auto &gridRb = *envGetRbGrid(FermionField);
     typename WilsonCloverFermion<FImpl>::ImplParams implParams;
-    implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    if (!par().boundary.empty())
+    {
+        implParams.boundary_phases = strToVec<Complex>(par().boundary);
+    }
+    if (!par().twist.empty())
+    {
+        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    }
+    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
+                 << std::endl;
+    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
+                 << std::endl;
+    if (implParams.boundary_phases.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of boundary phase");
+    }
+    if (implParams.twist_n_2pi_L.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of twist");
+    }
     envCreateDerived(FMat, WilsonCloverFermion<FImpl>, getName(), 1, U, grid,
                      gridRb, par().mass, par().csw_r, par().csw_t, 
                      par().clover_anisotropy, implParams); 
diff --git a/Hadrons/Modules/MAction/ZMobiusDWF.hpp b/Hadrons/Modules/MAction/ZMobiusDWF.hpp
index e780bd73..56efe1a2 100644
--- a/Hadrons/Modules/MAction/ZMobiusDWF.hpp
+++ b/Hadrons/Modules/MAction/ZMobiusDWF.hpp
@@ -118,10 +118,7 @@ void TZMobiusDWF<FImpl>::setup(void)
     {
         LOG(Message) << "  omega[" << i << "]= " << par().omega[i] << std::endl;
     }
-    LOG(Message) << "Fermion boundary conditions: " << par().boundary
-                 << std::endl;
 
-    env().createGrid(par().Ls);
     auto &U    = envGet(GaugeField, par().gauge);
     auto &g4   = *envGetGrid(FermionField);
     auto &grb4 = *envGetRbGrid(FermionField);
@@ -129,8 +126,26 @@ void TZMobiusDWF<FImpl>::setup(void)
     auto &grb5 = *envGetRbGrid(FermionField, par().Ls);
     auto omega = par().omega;
     typename ZMobiusFermion<FImpl>::ImplParams implParams;
-    implParams.boundary_phases = strToVec<Complex>(par().boundary);
-    implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    if (!par().boundary.empty())
+    {
+        implParams.boundary_phases = strToVec<Complex>(par().boundary);
+    }
+    if (!par().twist.empty())
+    {
+        implParams.twist_n_2pi_L   = strToVec<Real>(par().twist);
+    }
+    LOG(Message) << "Fermion boundary conditions: " << implParams.boundary_phases
+                 << std::endl;
+    LOG(Message) << "Twists: " << implParams.twist_n_2pi_L
+                 << std::endl;
+    if (implParams.boundary_phases.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of boundary phase");
+    }
+    if (implParams.twist_n_2pi_L.size() != env().getNd())
+    {
+        HADRONS_ERROR(Size, "Wrong number of twist");
+    }
     envCreateDerived(FMat, ZMobiusFermion<FImpl>, getName(), par().Ls, U, g5,
                      grb5, g4, grb4, par().mass, par().M5, omega,
                      par().b, par().c, implParams);
diff --git a/Hadrons/Modules/MGauge/GaugeFix.hpp b/Hadrons/Modules/MGauge/GaugeFix.hpp
index 87822a7e..57b1e082 100644
--- a/Hadrons/Modules/MGauge/GaugeFix.hpp
+++ b/Hadrons/Modules/MGauge/GaugeFix.hpp
@@ -7,6 +7,7 @@ Source file: Hadrons/Modules/MGauge/GaugeFix.hpp
 Copyright (C) 2015-2019
 
 Author: Antonin Portelli <antonin.portelli@me.com>
+Author: Nils Asmussen <n.asmussen@soton.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 
 This program is free software; you can redistribute it and/or modify
@@ -58,6 +59,7 @@ class TGaugeFix: public Module<GaugeFixPar>
 {
 public:
     GAUGE_TYPE_ALIASES(GImpl,);
+    typedef typename GImpl::GaugeLinkField GaugeMat;
 public:
     // constructor
     TGaugeFix(const std::string name);
@@ -94,7 +96,7 @@ std::vector<std::string> TGaugeFix<GImpl>::getInput(void)
 template <typename GImpl>
 std::vector<std::string> TGaugeFix<GImpl>::getOutput(void)
 {
-    std::vector<std::string> out = {getName()};
+    std::vector<std::string> out = {getName(), getName()+"_xform"};
     return out;
 }
 
@@ -103,6 +105,7 @@ template <typename GImpl>
 void TGaugeFix<GImpl>::setup(void)
 {
     envCreateLat(GaugeField, getName());
+    envCreateLat(GaugeMat, getName()+"_xform");
 }
 
 
@@ -116,6 +119,7 @@ void TGaugeFix<GImpl>::execute(void)
     LOG(Message) << par().gauge << std::endl;
     auto &U     = envGet(GaugeField, par().gauge);
     auto &Umu   = envGet(GaugeField, getName());
+    auto &xform = envGet(GaugeMat, getName()+"_xform");
     LOG(Message) << "Gauge Field fetched" << std::endl;
     //do we allow maxiter etc to be user set?
     Real alpha     = par().alpha;
@@ -123,8 +127,8 @@ void TGaugeFix<GImpl>::execute(void)
     Real Omega_tol = par().Omega_tol;
     Real Phi_tol   = par().Phi_tol;
     bool Fourier   = par().Fourier;
-    FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(U,alpha,maxiter,Omega_tol,Phi_tol,Fourier);
     Umu = U;
+    FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier);
     LOG(Message) << "Gauge Fixed" << std::endl;
 }
 
diff --git a/Hadrons/Utilities/HadronsXmlValidate.cc b/Hadrons/Utilities/HadronsXmlValidate.cc
new file mode 100644
index 00000000..73cf3139
--- /dev/null
+++ b/Hadrons/Utilities/HadronsXmlValidate.cc
@@ -0,0 +1,64 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Utilities/HadronsXmlValidate.cc
+
+Copyright (C) 2015-2019
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+
+#include <Hadrons/Application.hpp>
+
+using namespace Grid;
+using namespace QCD;
+using namespace Hadrons;
+
+int main(int argc, char *argv[])
+{
+    // parse command line
+    std::string parameterFileName;
+    
+    if (argc != 2)
+    {
+        std::cerr << "usage: " << argv[0] << " <parameter file>";
+        std::cerr << std::endl;
+        std::exit(EXIT_FAILURE);
+    }
+    parameterFileName = argv[1];
+    
+    try
+    {
+        Application application(parameterFileName);
+        
+        application.parseParameterFile(parameterFileName);
+        auto &vm = VirtualMachine::getInstance();
+        vm.getModuleGraph();
+        LOG(Message) << "Application valid (check XML warnings though)" 
+                     << std::endl;
+    }
+    catch (const std::exception& e)
+    {
+        Exceptions::abort(e);
+    }
+    
+    return EXIT_SUCCESS;
+}
diff --git a/Hadrons/Utilities/Makefile.am b/Hadrons/Utilities/Makefile.am
index 4f324d6d..6c48d966 100644
--- a/Hadrons/Utilities/Makefile.am
+++ b/Hadrons/Utilities/Makefile.am
@@ -1,8 +1,11 @@
-bin_PROGRAMS = HadronsXmlRun HadronsFermionEP64To32 HadronsContractor HadronsContractorBenchmark
+bin_PROGRAMS = HadronsXmlRun HadronsXmlValidate HadronsFermionEP64To32 HadronsContractor HadronsContractorBenchmark
 
 HadronsXmlRun_SOURCES = HadronsXmlRun.cc
 HadronsXmlRun_LDADD   = ../libHadrons.a ../../Grid/libGrid.a
 
+HadronsXmlValidate_SOURCES = HadronsXmlValidate.cc
+HadronsXmlValidate_LDADD   = ../libHadrons.a ../../Grid/libGrid.a
+
 HadronsFermionEP64To32_SOURCES  = EigenPackCast.cc
 HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
 HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a
diff --git a/documentation/GridXcode/GridXcFig1.png b/documentation/GridXcode/GridXcFig1.png
new file mode 100644
index 00000000..73fd0625
Binary files /dev/null and b/documentation/GridXcode/GridXcFig1.png differ
diff --git a/documentation/GridXcode/GridXcFig2.png b/documentation/GridXcode/GridXcFig2.png
new file mode 100644
index 00000000..9b169491
Binary files /dev/null and b/documentation/GridXcode/GridXcFig2.png differ
diff --git a/documentation/GridXcode/GridXcFig3.png b/documentation/GridXcode/GridXcFig3.png
new file mode 100644
index 00000000..02595468
Binary files /dev/null and b/documentation/GridXcode/GridXcFig3.png differ
diff --git a/documentation/GridXcode/GridXcFig4.png b/documentation/GridXcode/GridXcFig4.png
new file mode 100644
index 00000000..5abd150b
Binary files /dev/null and b/documentation/GridXcode/GridXcFig4.png differ
diff --git a/documentation/GridXcode/readme.md b/documentation/GridXcode/readme.md
new file mode 100644
index 00000000..b75d9323
--- /dev/null
+++ b/documentation/GridXcode/readme.md
@@ -0,0 +1,422 @@
+# Using Xcode with Grid on Mac OS
+
+This guide explains how to use Xcode as an IDE on Mac OS.
+
+# Initial setup
+
+For first time setup of the Xcode and Grid build environment on Mac OS, you will need to do the following, in this order:
+
+1. Install Xcode and the Xcode command-line utilities
+2. Set Grid environment variables
+3. Install and build Open MPI ***optional***
+4. Install and build Grid pre-requisites
+5. Install, Configure and Build Grid
+
+Apple's [Xcode website][Xcode] is the go-to reference for 1, and the definitive reference for 4 and 5 is the [Grid Documentation][GridDoc].
+
+[Xcode]: https://developer.apple.com/xcode/
+[GridDoc]: https://github.com/paboyle/Grid/blob/develop/documentation/Grid.pdf
+
+The following sections explain these steps in more detail
+
+## 1. Install Xcode and the Xcode command-line utilities
+
+See Apple's [Xcode website][Xcode] for instructions on installing Xcode.
+
+Once Xcode is installed, install the Xcode command-line utilities using:
+
+    xcode-select --install
+
+*NB: the screenshots from this guide were generated from Xcode 10.1.*
+
+## 2. Set Grid environment variables
+
+To make sure we can share Xcode projects via git and have them work without requiring modification, we will define Grid environment variables. To make sure these environment variables will be available to the Xcode build system, issue the following command:
+
+    defaults write com.apple.dt.Xcode UseSanitizedBuildSystemEnvironment -bool NO
+
+These are the environment variables we will define for Grid:
+
+Variable | Typical Value | Use
+--- | --- | ---
+`Grid` | `/Users/user_id/src/Grid` | Path to grid source
+`GridPre` | `/Users/user_id/bin` | Path to install directory containing grid pre-requisites built from source
+`GridPkg` | **MacPorts**=`/opt/local`, **Homebrew**=`/usr/local` | Path to package manager install directory
+
+Choose either of the following ways to do this, and when you're done, log out and in again. To check these have been set:
+
+    printenv|grep -i grid
+
+### Method 1 -- Apple Script
+
+* Start *Script Editor* (cmd-space, *script editor*)
+* Click on *New Document*. Paste the following into the new script, editing the paths appropriately (just replace `user_id` with your *user_id* if you are unsure):
+
+```apple script
+do shell script "launchctl setenv Grid $HOME/src/Grid
+launchctl setenv GridPre $HOME/bin
+launchctl setenv GridPkg /opt/local"
+```
+
+* Save the script inside `~/Applications` and give it the name `GridEnv.app`.
+* Open `System Preferences`, `Users & Groups`
+* Click on `Login Items`
+* Click the plus sign to add a new login item
+* Select the `~/Applications` folder and select `GridEnv.app`
+
+Log out and in again.
+
+### Method 2 -- `environment.plist`
+
+Make the file `environment.plist` in `~/Library/LaunchAgents` with the following contents, editing the paths appropriately (just replace `user_id` with your *user_id* if you are unsure):
+
+```html
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+<key>Label</key>
+<string>Grid.startup</string>
+<key>ProgramArguments</key>
+<array>
+<string>sh</string>
+<string>-c</string>
+<string>launchctl setenv Grid $HOME/src/Grid
+launchctl setenv GridPre $HOME/bin
+launchctl setenv GridPkg /opt/local</string>
+
+</array>
+<key>RunAtLoad</key>
+<true/>
+</dict>
+</plist>
+```
+
+## 3. Install and build Open MPI -- ***optional***
+
+Download the latest version of [Open MPI][OMPI] version 3.1 (I used 3.1.3).
+
+NB: Grid does not have any dependencies on fortran, however many standard scientific packages do, so you may as well download GNU fortran (e.g. MacPorts ``gfortran`` package) and build Open MPI like so:
+
+[OMPI]: https://www.open-mpi.org/software/ompi/v3.1/
+
+    ../configure CC=clang CXX=clang++ F77=gfortran FC=gfortran CXXFLAGS=-g --prefix=$GridPre/openmpi-3.1.3
+    make -j 4 all install
+
+(If you don't want to bother with fortran bindings, just don't include the F77 and FC flags)
+
+## 4. Install and build Grid pre-requisites
+
+To simplify the installation of **Grid pre-requisites**, you can use your favourite package manager, e.g.:
+
+### 1. [MacPorts][MacPorts]
+
+[MacPorts]: https://www.macports.org "MacPorts package manager"
+
+Install [MacPorts][MacPorts] if you haven't done so already, and then install packages with:
+
+    sudo port install <portname>
+
+These are the `portname`s for mandatory Grid libraries:
+
+* git
+* gmp
+* mpfr
+
+and these are the `portname`s for optional Grid libraries:
+
+* fftw-3
+* hdf5
+* lapack
+* doxygen
+* OpenBLAS
+
+***Please update this list with any packages I've missed! ... and double-check whether OpenBLAS is really for Grid***
+
+### 2. [Homebrew][Homebrew]
+
+[Homebrew]: https://brew.sh "Homebrew package manager"
+
+Install [Homebrew][Homebrew] if you haven't done so already, and then install packages with:
+
+    sudo brew install <packagename>
+
+The same packages are available as from MacPorts.
+
+### Install LIME ***optional***
+
+There isn't currently a port for [C-LIME][C-LIME], so download the source and then build it:
+
+[C-LIME]: https://usqcd-software.github.io/c-lime/ "C-language API for Lattice QCD Interchange Message Encapsulation / Large Internet Message Encapsulation"
+
+    ../configure --prefix=$GridPre/lime-1.3.2 CC=clang
+    make -j 4
+    make install
+
+## 5. Install, Configure and Build Grid
+
+### 5.1 Install Grid
+
+[Grid]: https://github.com/paboyle/Grid
+
+Start by cloning [Grid (from GitHub)][Grid] ***into the directory you specified in***  `$Grid`. Bear in mind that git will create the `Grid` subdirectory to place Grid in, so for example if `$Grid` is set to `~/src/Grid` then install Grid with:
+
+    cd ~/src
+
+followed by either:
+
+    git clone git@github.com:paboyle/Grid.git
+
+or
+
+    git clone https://github.com/paboyle/Grid.git
+
+depending on whether you are using https or ssh.
+
+### 5.2 Configure Grid
+
+The Xcode build system supports multiple configurations for each project, by default: `Debug` and `Release`, but more configurations can be defined. We will create separate Grid build directories for each configuration, using the Grid **Autoconf** build system to make each configuration. NB: it is **not** necessary to run `make install` on them once they are built (IDE features such as *jump to definition* will work better of you don't).
+
+Below are shown the `configure` script invocations for three recommended configurations. You are free to define more, less or different configurations, but as a minimum, be sure to build a `Debug` configuration.
+
+#### 1. `Debug`
+
+This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging:
+
+    ../configure --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre/lime-1.3.2 --enable-simd=GEN --enable-precision=double CXX=clang++ --prefix=$GridPre/GridDebug --enable-comms=none --enable-doxygen-doc
+
+#### 2. `Release`
+
+Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation):
+
+    ../configure --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre/lime-1.3.2 --enable-simd=GEN --enable-precision=single CXX=clang++ --prefix=$GridPre/GridRelease --enable-comms=none --enable-doxygen-doc
+
+#### 3. `MPIDebug`
+
+Debug configuration with MPI:
+
+    ../configure --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre/lime-1.3.2 --enable-simd=GEN --enable-precision=double CXX=clang++ --prefix=$GridPre/GridMPIDebug --enable-comms=mpi-auto MPICXX=$GridPre/openmpi-3.1.3/bin/mpicxx --enable-doxygen-doc
+
+### 5.3 Build Grid
+
+Each configuration must be built before they can be used. You can either:
+
+1.  Use automake and the Grid Makefile with `make -j 4` (NB: you **do not** need to run `make install` for these to work with Xcode)
+2. Build `Grid` and `Hadrons` under Xcode (see below)
+
+# Make a new application which links to Grid / Hadrons
+
+Making an Xcode project which links to Grid / Hadrons is straightforward:
+
+* Make a new application (in the usual way)
+* Configure your application to use Grid (via three project settings:)
+    1. `HEADER_SEARCH_PATHS`
+    2. `LIBRARY_SEARCH_PATHS`
+    3. `OTHER_LDFLAGS`
+* Make additional configurations, e.g. `MPIDebug` (NB Xcode will make `Debug` and `Release` by default)
+
+Detailed instructions follow, but instead of following the instructions in this section, you can clone `HelloGrid` from the [University of Edinburgh GitLab site][HelloGrid].
+
+[HelloGrid]: https://git.ecdf.ed.ac.uk/s1786208/HelloGrid
+
+## Make a new application
+
+To make a hello world application for Grid:
+
+* Start Xcode
+* Click 'Create a new project'
+* Click ‘macOS’, then in the ‘Application’ section choose ‘Command Line Tool’, then click ‘Next’
+* Choose options for your new project:
+  * Product Name: HelloGrid
+  * Team: None
+  * Organisation Name: sopa
+  * Organisation Identifier: uk.ac.ed.ph
+  * Language: C++
+  * ... then click ‘Next’
+* Choose a location for your project, e.g. `$HOME/src`. NB: The project and all it’s files will be created inside `$HOME/src/HelloGrid`. If you are using Git, you can put the new project under Git source control immediately, if you like. Now click ‘Create’.
+
+## Configure your new application to use Grid
+
+Click the project name (`HelloGrid`) in the project navigator pane on the left (command-1 if it's not visible), then click the project name (`HelloGrid`) under `PROJECT` in the second pane. Click the `Build Settings` tab on the right, then under that click `All` and `Combined`. You should see:
+
+![Project settings](GridXcFig1.png)
+
+We now need to make changes to two sections (these are listed in alphabetical order), bearing in mind that if you are not using MPI (or you gave your build directories different names) replace `build_mpidebug` and `build_mpirelease` with the directory names you used.
+
+### 1. Search Paths
+
+#### HEADER_SEARCH_PATHS
+
+Obtain a list of header locations required by Grid by running the following from your Grid build directory (choose an MPI configuration if you built one, e.g. `MPIDebug`):
+
+    ./grid-config --cxxflags
+
+Output should look similar to:
+
+    -I$GridPre/openmpi-3.1.3/include -I$GridPkg/include -I$GridPre/lime-1.3.2/include -I$GridPkg/include -I$GridPkg/include -I$GridPkg/include -O3 -g -std=c++11
+
+The header locations follow the `-I` switches. You can ignore the other switches, and you can ignore duplicate entries, which just mean that your package manager has installed multiple packages in the same location.
+
+*Note: `grid-config` will output absolute paths. Make sure to replace absolute paths with environment variables (such as `$GridPre`) in your settings, so that the project will work unmodified for other collaborators downloading the same project from git.*
+
+Set HEADER_SEARCH_PATHS to:
+
+    $Grid/build$(CONFIGURATION)/Grid
+    $Grid
+    $Grid/Grid
+
+followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.:
+
+    $GridPre/openmpi-3.1.3/include
+    $GridPkg/include
+    $GridPre/lime-1.3.2/include
+
+**Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.:
+
+    $Grid/build$(CONFIGURATION)/Grid $Grid $Grid/Grid $GridPre/openmpi-3.1.3/include $GridPkg/include $GridPre/lime-1.3.2/include
+
+#### LIBRARY_SEARCH_PATHS
+
+Obtain a list of library locations required by Grid by running the following from your Grid build directory (again, choose an MPI configuration if you built one, e.g. `MPIDebug`):
+
+    ./grid-config --ldflags
+
+Output should look similar to:
+
+    -L$GridPre/openmpi-3.1.3/lib -L$GridPkg/lib -L$GridPre/lime-1.3.2/lib -L$GridPkg/lib -L$GridPkg/lib -L$GridPkg/lib
+
+Paste the output ***with `$Grid/build$(CONFIGURATION)/Grid $Grid/build$(CONFIGURATION)/Hadrons ` prepended*** into `LIBRARY_SEARCH_PATHS`:
+
+    $Grid/build$(CONFIGURATION)/Grid $Grid/build$(CONFIGURATION)/Hadrons $GridPre/openmpi-3.1.3/lib $GridPkg/lib $GridPre/lime-1.3.2/lib
+
+### 2. Linking
+
+#### OTHER_LDFLAGS
+
+The easiest way to link to all required libraries is to obtain a list of all libraries required by Grid by running the following from your Grid build directory:
+
+    ./grid-config --libs
+
+and pasting the output ***with `-lGrid -lHadrons ` prepended*** (including the `-l` switches) directly into `OTHER_LDFLAGS`, e.g.:
+
+    -lGrid -lHadrons -lmpi -lhdf5_cpp -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lstdc++ -lm -lz -lhdf5
+
+## Make additional configurations
+
+On the project settings, `Info` tab, click the plus sign underneath configurations:
+
+![Add configurations](GridXcFig4.png)
+
+Choose `Duplicate "Debug" Configuration` (you can choose `Release` if you prefer) and give the new configuration a name, e.g. `MPIDebug`.
+
+## Edit your source code
+
+A hello world for grid is:
+
+```c++
+#include <Grid/Grid.h>
+using namespace Grid;
+
+int main(int argc, char * argv[]) {
+  Grid_init(&argc,&argv);
+  std::cout << GridLogMessage << "Hello Grid" << std::endl;
+  Grid_finalize();
+  return 0;
+}
+```
+
+## Create a `.gitignore` file for Xcode
+
+You can create an up-to-date .gitignore file to ignore all the Xcode temporary build files using [gitignore.io][GIO].
+
+[GIO]: https://www.gitignore.io/api/xcode
+
+NB: If you let Xcode add your project to git when you created it, you probably want to remove your personal scheme selection from git:
+
+    git rm --cached HelloGrid.xcodeproj/xcuserdata/$USER.xcuserdatad/xcschemes/xcschememanagement.plist
+
+## Run your program under the Xcode debugger
+
+First, specify command-line arguments. From the menu, select `Product`, then `Scheme`, then `Edit Scheme`. Select `Run` on the left, then select the `Arguments` tab on the right. Add the following to `Arguments passed on Launch`:
+
+    --grid 4.4.4.8
+
+If your program will be manipulating files, it's a good idea to specify the working directory on the `Options` tab under `Use Custom Working Directory` (by default, Xcode launches the program inside the Xcode build folder). 
+
+Then click `Close`.
+
+Let's set a breakpoint by clicking on:
+
+    Grid_finalize();
+
+then from the menu selecting `Debug`, then `Breakpoints`, then `Add Breakpoint at Current Line`.
+
+Now click on the `Play` button (the right pointing triangle just to the right of the maximise button) to run your program under the debugger. (You may see dialog boxes the first couple of times asking whether to allow MPI to receive network requests - say yes to these.)
+
+The debug output pane opens at the bottom of Xcode, with output on the right (ending with `Hello Grid`) and local variables on the left i.e.:
+
+![Running under the debugger](GridXcFig2.png)
+
+See the Xcode documentation to learn about the debugger. When you're done, press `ctl-cmd-Y` to let the program run to completion. 
+
+# Debugging multiple MPI processes under Xcode
+
+You could tell Xcode to use mpirun to launch multiple copies of a target executable, however if you do this the debugger will attach to mpirun - not your target process.
+
+Instead:
+
+1. Set a breakpoint just inside `main()` (otherwise your programs may complete before you attach to them all)
+
+2. From the `Debug` menu, select `Attach to Process by PID or Name ...`.  In the `PID or Process Name` field, enter the name of your target. Then click `Attach`.
+
+3. From a terminal session, locate and run your executable using `mpirun` (*the mangled name of the project build products will not be exactly the same as this example*):
+
+    `$GridPre/openmpi-3.1.3/bin/mpirun -np 2 ~/Library/Developer/Xcode/DerivedData/HelloGrid-fiyyuveptaqelbbvllomcgjyvghr/Build/Products/Debug/HelloGrid --grid 4.4.4.8 --mpi 1.1.1.2`
+
+    The Xcode debugger will attach to the first process.
+
+4. From the `Debug` menu in Xcode, select `Attach to Process`, and other running instances of your application will appear at the top of the list. Attach to as many instances as you wish to debug.
+
+5. Click on the first process (which should have stopped at the breakpoint) and restart it with ctl-cmd-y
+
+You are now debugging multiple MPI instances, and the Xcode debugger should look similar to this:   
+
+![Debugging multiple MPI instances under the Xcode debugger](GridXcFig3.png)
+
+# Build `Grid` and `Hadrons` libraries under Xcode
+
+If you want to build `Grid` and `Hadrons` libraries using Xcode, you will need to:
+
+1. Make new library targets for `Grid` and `Hadrons`
+
+2. Add Grid source folders to your project:
+    a. Right click project then `Add files to "project" ...`
+    b. Choose `$Grid/Grid` folder
+    c. Select `Create groups` (`folder references` doesn't work)
+    d. Select `Grid` (containing just the Grid sources, not the entire Git repository) as your target
+    e. Click `Add`
+
+3. Add Hadrons source folders to your project
+    a. As per `Grid`, but change the target to `Hadrons`
+    b. For each source file in `Archive`, remove them from the target (option-command-1, then untick source files)
+    
+4. Set the following values for each target in `Build Settings`
+
+    Group | Variable | Value
+    --- | --- | ---
+    `Deployment` | `DSTROOT` | `$Grid/build$(CONFIGURATION)` *(do this for the entire project)*
+    `Deployment` | `DEPLOYMENT_LOCATION` | `Yes`
+    `Deployment` | `INSTALL_PATH` | `$(PRODUCT_NAME)/`
+    `Deployment` | `SKIP_INSTALL` | `No`
+    `Linking` | `OTHER_LDFLAGS` | remove `-lGrid -lHadrons` from the list
+    
+    This ensures that the libraries are copied back into the build folders when they are made (removing the need to run `make -j 4`)
+
+5. For `Grid`, in `Build Settings` in the `Build Options` group, set:
+
+    Variable | Configuration | Value
+    --- | --- | ---
+    `EXCLUDED_SOURCE_FILE_NAMES` | Non-MPI configurations (`Debug` and `Release`) | `$(Grid)/Grid/communicator/Communicator_mpi3.cc $(Grid)/Grid/communicator/SharedMemoryMPI.cc` 
+    `EXCLUDED_SOURCE_FILE_NAMES` | MPI configurations (`MPIDebug`) | `$(Grid)/Grid/communicator/Communicator_none.cc $(Grid)/Grid/communicator/SharedMemoryNone.cc`
+
+You should now be able to build and debug any configuration.
diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc
index 15602e93..d1ec1309 100644
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -4,11 +4,12 @@
 
     Source file: ./tests/Test_serialisation.cc
 
-    Copyright (C) 2015-2016
+    Copyright (C) 2015-2019
 
 Author: Guido Cossu <guido.cossu@ed.ac.uk>
 Author: Antonin Portelli <antonin.portelli@me.com>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: Michael Marshall <michael.marshall@ed.ac.uk>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -28,6 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
     *************************************************************************************/
     /*  END LEGAL */
 #include <Grid/Grid.h>
+#include <typeinfo>
 
 using namespace Grid;
 using namespace Grid::QCD;
@@ -80,26 +82,159 @@ double   d   = 2*M_PI;
 bool     b   = false;
 
 template <typename W, typename R, typename O>
-void ioTest(const std::string &filename, const O &object, const std::string &name)
+void ioTest(const std::string &filename, const O &object, const std::string &name,
+            const char * tag = "testobject", unsigned short Precision = 0 )
 {
+  std::cout << "IO test: " << name << " -> " << filename << " ...";
   // writer needs to be destroyed so that writing physically happens
   {
     W writer(filename);
-
-    write(writer, "testobject", object);
+    if( Precision )
+      writer.setPrecision(Precision);
+    write(writer, tag , object);
   }
 
+  std::cout << " done. reading ...";
   R    reader(filename);
-  O    buf;
-  bool good;
+  std::unique_ptr<O> buf( new O ); // In case object too big for stack
 
-  read(reader, "testobject", buf);
-  good = (object == buf);
-  std::cout << name << " IO test: " << (good ? "success" : "failure");
-  std::cout << std::endl;
-  if (!good) exit(EXIT_FAILURE);
+  read(reader, tag, *buf);
+  bool good = Serializable::CompareMember(object, *buf);
+  if (!good) {
+    std::cout << " failure!" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  std::cout << " done." << std::endl;
 }
 
+// The only way I could get these iterators to work is to put the begin() and end() functions in the Eigen namespace
+// So if Eigen ever defines these, we'll have a conflict and have to change this
+namespace Eigen {
+  template <typename ET>
+  inline typename std::enable_if<EigenIO::is_tensor<ET>::value, typename EigenIO::Traits<ET>::scalar_type *>::type
+  begin( ET & et ) { return reinterpret_cast<typename Grid::EigenIO::Traits<ET>::scalar_type *>(et.data()); }
+  template <typename ET>
+  inline typename std::enable_if<EigenIO::is_tensor<ET>::value, typename EigenIO::Traits<ET>::scalar_type *>::type
+  end( ET & et ) { return begin(et) + et.size() * EigenIO::Traits<ET>::count; }
+}
+
+// Perform I/O tests on a range of tensor types
+// Test coverage: scalars, complex and GridVectors in single, double and default precision
+class TensorIO : public Serializable {
+  using TestScalar = ComplexD;
+  using SR3 = Eigen::Sizes<9,4,2>;
+  using SR5 = Eigen::Sizes<5,4,3,2,1>;
+  using ESO = Eigen::StorageOptions;
+  using TensorRank3  = Eigen::Tensor<ComplexF, 3, ESO::RowMajor>;
+  using TensorR5     = Eigen::TensorFixedSize<Real, SR5>;
+  using TensorR5Alt  = Eigen::TensorFixedSize<Real, SR5, ESO::RowMajor>;
+  using Tensor942    = Eigen::TensorFixedSize<TestScalar, SR3, ESO::RowMajor>;
+  using aTensor942   = std::vector<Tensor942>;
+  using Perambulator = Eigen::Tensor<SpinColourVector, 6, ESO::RowMajor>;
+  using LSCTensor    = Eigen::TensorFixedSize<SpinColourMatrix, Eigen::Sizes<6,5>>;
+  
+  static const Real       FlagR;
+  static const Complex    Flag;
+  static const ComplexF   FlagF;
+  static const TestScalar FlagTS;
+  static const char * const pszFilePrefix;
+
+  void Init(unsigned short Precision)
+  {
+    for( auto &s : Perambulator1 ) s = Flag;
+    for( auto &s : Perambulator2 ) s = Flag;
+    for( auto &s : tensorR5 )      s = FlagR;
+    for( auto &s : tensorRank3 )   s = FlagF;
+    for( auto &s : tensor_9_4_2 )  s = FlagTS;
+    for( auto &t : atensor_9_4_2 )
+      for( auto &s : t )           s = FlagTS;
+    for( auto &s : MyLSCTensor )   s = Flag;
+  }
+  
+  // Perform an I/O test for a single Eigen tensor (of any type)
+  template <typename W, typename R, typename T, typename... IndexTypes>
+  static void TestOne(const char * MyTypeName, unsigned short Precision, std::string &filename,
+                      const char * pszExtension, unsigned int &TestNum,
+                      typename EigenIO::Traits<T>::scalar_type Flag, IndexTypes... otherDims)
+  {
+    using Traits = EigenIO::Traits<T>;
+    using scalar_type = typename Traits::scalar_type;
+    std::unique_ptr<T> pTensor{new T(otherDims...)};
+    for( auto &s : * pTensor ) s = Flag;
+    filename = pszFilePrefix + std::to_string(++TestNum) + "_" + MyTypeName + pszExtension;
+    ioTest<W, R, T>(filename, * pTensor, MyTypeName, MyTypeName);
+  }
+  
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(TensorIO
+                                  , SpinColourVector,         spinColourVector
+                                  , SpinColourMatrix,         spinColourMatrix
+                                  , std::vector<std::string>, DistilParameterNames
+                                  , std::vector<int>,         DistilParameterValues
+                                  , Perambulator,             Perambulator1
+                                  , Perambulator,             Perambulator2
+                                  , TensorR5,                 tensorR5
+                                  , TensorRank3,              tensorRank3
+                                  , Tensor942,                tensor_9_4_2
+                                  , aTensor942,               atensor_9_4_2
+                                  , LSCTensor,                MyLSCTensor
+                                  );
+  TensorIO()
+  : DistilParameterNames {"do", "androids", "dream", "of", "electric", "sheep?"}
+  , DistilParameterValues{2,3,1,4,5,1}
+  , Perambulator1(2,3,1,4,5,1)
+  , Perambulator2(7,1,6,1,5,1)
+  , tensorRank3(7,3,2)
+  , atensor_9_4_2(3) {}
+  
+#define TEST_PARAMS( T ) #T, Precision, filename, pszExtension, TestNum
+  
+  // Perform a series of I/O tests for Eigen tensors, including a serialisable object
+  template <typename WTR_, typename RDR_>
+  static void Test(const char * pszExtension, unsigned short Precision = 0)
+  {
+    // Perform a series of tests on progressively more complex tensors
+    unsigned int TestNum = 0;
+    std::string filename;
+    // Rank 1 tensor containing a single integer
+    using TensorSingle = Eigen::TensorFixedSize<Integer, Eigen::Sizes<1>>;
+    TestOne<WTR_, RDR_, TensorSingle>( TEST_PARAMS( TensorSingle ), 7 ); // lucky!
+    // Rather convoluted way of defining four complex numbers
+    using TensorSimple = Eigen::Tensor<iMatrix<TestScalar,2>, 6>;
+    using I = typename TensorSimple::Index; // NB: Never specified, so same for all my test tensors
+    // Try progressively more complicated tensors
+    TestOne<WTR_, RDR_, TensorSimple, I,I,I,I,I,I>( TEST_PARAMS( TensorSimple ), FlagTS, 1,1,1,1,1,1 );
+    TestOne<WTR_, RDR_, TensorRank3, I, I, I>( TEST_PARAMS( TensorRank3 ), FlagF, 6, 3, 2 );
+    TestOne<WTR_, RDR_, Tensor942>(TEST_PARAMS( Tensor942 ), FlagTS);
+    TestOne<WTR_, RDR_, LSCTensor>(TEST_PARAMS( LSCTensor ), Flag );
+    TestOne<WTR_, RDR_, TensorR5>(TEST_PARAMS( TensorR5 ), FlagR);
+    // Now test a serialisable object containing a number of tensors
+    {
+      static const char MyTypeName[] = "TensorIO";
+      filename = pszFilePrefix + std::to_string(++TestNum) + "_" + MyTypeName + pszExtension;
+      std::unique_ptr<TensorIO> pObj{new TensorIO()};
+      pObj->Init(Precision);
+      ioTest<WTR_, RDR_, TensorIO>(filename, * pObj, MyTypeName, MyTypeName, Precision);
+    }
+    // Stress test. Too large for the XML or text readers and writers!
+#ifdef STRESS_TEST
+    const std::type_info &tw = typeid( WTR_ );
+    if( tw == typeid( Hdf5Writer ) || tw == typeid( BinaryWriter ) ) {
+      using LCMTensor=Eigen::TensorFixedSize<iMatrix<iVector<iMatrix<iVector<LorentzColourMatrix,5>,2>,7>,3>,
+      Eigen::Sizes<2,4,11,10,9>, Eigen::StorageOptions::RowMajor>;
+      std::cout << "sizeof( LCMTensor ) = " << sizeof( LCMTensor ) / 1024 / 1024 << " MB" << std::endl;
+      TestOne<WTR_, RDR_, LCMTensor>(TEST_PARAMS( LCMTensor ), Flag);
+    }
+#endif
+  }
+};
+
+const Real                 TensorIO::FlagR {1};
+const Complex              TensorIO::Flag  {1,-1};
+const ComplexF             TensorIO::FlagF {1,-1};
+const TensorIO::TestScalar TensorIO::FlagTS{1,-1};
+const char * const         TensorIO::pszFilePrefix = "tensor_";
+
 template <typename T>
 void tensorConvTestFn(GridSerialRNG &rng, const std::string label)
 {
@@ -121,12 +256,12 @@ void tensorConvTestFn(GridSerialRNG &rng, const std::string label)
 int main(int argc,char **argv)
 {
   Grid_init(&argc,&argv);
-  
+  std::cout << std::boolalpha << "==== basic IO" << std::endl; // display true / false for boolean
+
   GridSerialRNG    rng;
 
   rng.SeedFixedIntegers(std::vector<int>({42,10,81,9}));
-  
-  std::cout << "==== basic IO" << std::endl;
+
   XmlWriter WR("bother.xml");
 
   // test basic type writing
@@ -146,7 +281,6 @@ int main(int argc,char **argv)
   // test serializable class writing
   myclass              obj(1234); // non-trivial constructor
   std::vector<myclass> vec;
-  std::pair<myenum, myenum> pair;
 
   std::cout << "-- serialisable class writing to 'bother.xml'..." << std::endl;
   write(WR,"obj",obj);
@@ -154,15 +288,15 @@ int main(int argc,char **argv)
   vec.push_back(obj);
   vec.push_back(myclass(5678));
   vec.push_back(myclass(3838));
-  pair = std::make_pair(myenum::red, myenum::blue);
 
   write(WR, "objvec", vec);
   std::cout << "-- serialisable class writing to std::cout:" << std::endl;
   std::cout << obj << std::endl;
   std::cout << "-- serialisable class comparison:" << std::endl;
-  std::cout << "vec[0] == obj: " << ((vec[0] == obj) ? "true" : "false") << std::endl;
-  std::cout << "vec[1] == obj: " << ((vec[1] == obj) ? "true" : "false") << std::endl;
+  std::cout << "vec[0] == obj: " << (vec[0] == obj) << std::endl;
+  std::cout << "vec[1] == obj: " << (vec[1] == obj) << std::endl;
   std::cout << "-- pair writing to std::cout:" << std::endl;
+  std::pair<myenum, myenum> pair = std::make_pair(myenum::red, myenum::blue);
   std::cout << pair << std::endl;
 
   // read tests
@@ -184,7 +318,15 @@ int main(int argc,char **argv)
 #ifdef HAVE_HDF5
   ioTest<Hdf5Writer, Hdf5Reader>("iotest.h5", obj, "HDF5   (object)           ");
   ioTest<Hdf5Writer, Hdf5Reader>("iotest.h5", vec, "HDF5   (vector of objects)");
+  std::cout << "\n==== detailed Hdf5 tensor tests (Grid::EigenIO)" << std::endl;
+  TensorIO::Test<Hdf5Writer, Hdf5Reader>(".h5");
 #endif
+  std::cout << "\n==== detailed binary tensor tests (Grid::EigenIO)" << std::endl;
+  TensorIO::Test<BinaryWriter, BinaryReader>(".bin");
+  std::cout << "\n==== detailed xml tensor tests (Grid::EigenIO)" << std::endl;
+  TensorIO::Test<XmlWriter, XmlReader>(".xml", 6);
+  std::cout << "\n==== detailed text tensor tests (Grid::EigenIO)" << std::endl;
+  TensorIO::Test<TextWriter, TextReader>(".dat", 5);
 
   std::cout << "\n==== vector flattening/reconstruction" << std::endl;
   typedef std::vector<std::vector<std::vector<double>>> vec3d;
@@ -227,4 +369,6 @@ int main(int argc,char **argv)
   tensorConvTest(rng, ColourVector);
   tensorConvTest(rng, SpinMatrix);
   tensorConvTest(rng, SpinVector);
+
+  Grid_finalize();
 }
diff --git a/tests/smearing/Test_smearing.cc b/tests/smearing/Test_smearing.cc
new file mode 100644
index 00000000..996671f0
--- /dev/null
+++ b/tests/smearing/Test_smearing.cc
@@ -0,0 +1,73 @@
+/*
+ *
+
+    Grid physics library, www.github.com/paboyle/Grid
+
+    Source file: ./tests/Test_wilson_cg_prec.cc
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridParallelRNG          pRNG(&Grid);  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeFermion src(&Grid); random(pRNG,src);
+  RealD nrm = norm2(src);
+  LatticeFermion result(&Grid); result=zero;
+  LatticeGaugeField Umu(&Grid);
+  //  SU3::HotConfiguration(pRNG,Umu);
+  SU3::ColdConfiguration(Umu);
+  std::vector<LatticeColourMatrix> U(4,&Grid);
+
+  for(int mu=0;mu<Nd;mu++){
+    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
+  }
+
+  std::vector<int> site({4,4,0,0});
+  src=zero;
+  SpinColourVector scv;
+  scv=zero;
+  scv()(0)(0) = 1.0;
+  pokeSite(scv,src,site);
+
+  CovariantSmearing<PeriodicGimplR>::GaussianSmear(U, src, 2.0, 50, Tdir);
+
+  std::cout << src <<std::endl;
+  
+}
+
diff --git a/tests/solver/Test_wilsonclover_mg_lime.cc b/tests/solver/Test_wilsonclover_mg_lime.cc
new file mode 100644
index 00000000..687ec83f
--- /dev/null
+++ b/tests/solver/Test_wilsonclover_mg_lime.cc
@@ -0,0 +1,194 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/solver/Test_wilsonclover_mg_mp.cc
+
+    Copyright (C) 2015-2018
+
+    Author: Daniel Richtmann <daniel.richtmann@ur.de>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+/* */
+
+
+#include <Grid/Grid.h>
+#include <Test_multigrid_common.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main(int argc, char **argv) {
+
+
+  Grid_init(&argc, &argv);
+
+  // clang-format off
+  GridCartesian         *FGrid_d   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridCartesian         *FGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian *FrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_d);
+  GridRedBlackCartesian *FrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(FGrid_f);
+  // clang-format on
+
+  std::vector<int> fSeeds({1, 2, 3, 4});
+  GridParallelRNG  fPRNG(FGrid_d);
+  fPRNG.SeedFixedIntegers(fSeeds);
+
+  // clang-format off
+  LatticeFermionD       src_d(FGrid_d); gaussian(fPRNG, src_d);
+  LatticeFermionD resultMGD_d(FGrid_d); resultMGD_d = zero;
+  LatticeFermionD resultMGF_d(FGrid_d); resultMGF_d = zero;
+  LatticeGaugeFieldD    Umu_d(FGrid_d);
+
+#if 0
+  {
+    FieldMetaData header;
+    std::string file("./qcdsf.769.00399.lime");
+    std::cout <<GridLogMessage<<"**************************************"<<std::endl;
+    std::cout <<GridLogMessage<<"** Reading back ILDG conf    *********"<<std::endl;
+    std::cout <<GridLogMessage<<"**************************************"<<std::endl;
+    IldgReader _IldgReader;
+    _IldgReader.open(file);
+    _IldgReader.readConfiguration(Umu_d,header);
+    _IldgReader.close();
+
+  }
+#else
+{
+  FieldMetaData header;
+  std::string file("./ckpoint_lat.IEEE64BIG.1100");
+  NerscIO::readConfiguration(Umu_d,header,file);
+}
+#endif
+  // SU3::HotConfiguration(fPRNG, Umu_d);
+
+  LatticeGaugeFieldF    Umu_f(FGrid_f); precisionChange(Umu_f, Umu_d);
+  // clang-format on
+
+  RealD mass  = -0.25;
+  RealD csw_r = 1.0;
+  RealD csw_t = 1.0;
+
+  MultiGridParams mgParams;
+  std::string     inputXml{"./mg_params.xml"};
+
+  if(GridCmdOptionExists(argv, argv + argc, "--inputxml")) {
+    inputXml = GridCmdOptionPayload(argv, argv + argc, "--inputxml");
+    assert(inputXml.length() != 0);
+  }
+
+  {
+    XmlWriter writer("mg_params_template.xml");
+    write(writer, "Params", mgParams);
+    std::cout << GridLogMessage << "Written mg_params_template.xml" << std::endl;
+
+    XmlReader reader(inputXml);
+    read(reader, "Params", mgParams);
+    std::cout << GridLogMessage << "Read in " << inputXml << std::endl;
+  }
+
+  checkParameterValidity(mgParams);
+  std::cout << mgParams << std::endl;
+
+  LevelInfo levelInfo_d(FGrid_d, mgParams);
+  LevelInfo levelInfo_f(FGrid_f, mgParams);
+
+  // Note: We do chiral doubling, so actually only nbasis/2 full basis vectors are used
+  const int nbasis = 40;
+
+  WilsonCloverFermionD Dwc_d(Umu_d, *FGrid_d, *FrbGrid_d, mass, csw_r, csw_t);
+  WilsonCloverFermionF Dwc_f(Umu_f, *FGrid_f, *FrbGrid_f, mass, csw_r, csw_t);
+
+  MdagMLinearOperator<WilsonCloverFermionD, LatticeFermionD> MdagMOpDwc_d(Dwc_d);
+  MdagMLinearOperator<WilsonCloverFermionF, LatticeFermionF> MdagMOpDwc_f(Dwc_f);
+
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+  std::cout << GridLogMessage << "Testing single-precision Multigrid for Wilson Clover" << std::endl;
+  std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+  auto MGPreconDwc_f = createMGInstance<vSpinColourVectorF, vTComplexF, nbasis, WilsonCloverFermionF>(mgParams, levelInfo_f, Dwc_f, Dwc_f);
+
+  MGPreconDwc_f->setup();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+    MGPreconDwc_f->runChecks(1e-6);
+  }
+
+  MixedPrecisionFlexibleGeneralisedMinimalResidual<LatticeFermionD, LatticeFermionF> MPFGMRESPREC(
+    1.0e-12, 50000, FGrid_f, *MGPreconDwc_f, 100, false);
+
+  std::cout << std::endl << "Starting with a new solver" << std::endl;
+  MPFGMRESPREC(MdagMOpDwc_d, src_d, resultMGF_d);
+
+  MGPreconDwc_f->reportTimings();
+
+  if(GridCmdOptionExists(argv, argv + argc, "--docomparison")) {
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Testing double-precision Multigrid for Wilson Clover" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    auto MGPreconDwc_d = createMGInstance<vSpinColourVectorD, vTComplexD, nbasis, WilsonCloverFermionD>(mgParams, levelInfo_d, Dwc_d, Dwc_d);
+
+    MGPreconDwc_d->setup();
+
+    if(GridCmdOptionExists(argv, argv + argc, "--runchecks")) {
+      MGPreconDwc_d->runChecks(1e-13);
+    }
+
+    FlexibleGeneralisedMinimalResidual<LatticeFermionD> FGMRESPREC(1.0e-12, 50000, *MGPreconDwc_d, 100, false);
+
+    std::cout << std::endl << "Starting with a new solver" << std::endl;
+    FGMRESPREC(MdagMOpDwc_d, src_d, resultMGD_d);
+
+    MGPreconDwc_d->reportTimings();
+
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+    std::cout << GridLogMessage << "Comparing single-precision Multigrid with double-precision one for Wilson Clover" << std::endl;
+    std::cout << GridLogMessage << "**************************************************" << std::endl;
+
+    LatticeFermionD diffFullSolver(FGrid_d);
+
+    RealD deviationFullSolver = axpy_norm(diffFullSolver, -1.0, resultMGF_d, resultMGD_d);
+
+    // clang-format off
+    LatticeFermionF src_f(FGrid_f);    precisionChange(src_f, src_d);
+    LatticeFermionF resMGF_f(FGrid_f); resMGF_f = zero;
+    LatticeFermionD resMGD_d(FGrid_d); resMGD_d = zero;
+    // clang-format on
+
+    (*MGPreconDwc_f)(src_f, resMGF_f);
+    (*MGPreconDwc_d)(src_d, resMGD_d);
+
+    LatticeFermionD diffOnlyMG(FGrid_d);
+    LatticeFermionD resMGF_d(FGrid_d);
+    precisionChange(resMGF_d, resMGF_f);
+
+    RealD deviationOnlyPrec = axpy_norm(diffOnlyMG, -1.0, resMGF_d, resMGD_d);
+
+    // clang-format off
+    std::cout << GridLogMessage << "Absolute difference between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver                      << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between FGMRES preconditioned by double and single precicision MG: " << deviationFullSolver / norm2(resultMGD_d) << std::endl;
+    std::cout << GridLogMessage << "Absolute difference between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec                        << std::endl;
+    std::cout << GridLogMessage << "Relative deviation  between one iteration of MG Prec in double and single precision:   " << deviationOnlyPrec / norm2(resMGD_d)      << std::endl;
+    // clang-format on
+  }
+
+  Grid_finalize();
+}