diff --git a/Grid/algorithms/iterative/LocalCoherenceLanczos.h b/Grid/algorithms/iterative/LocalCoherenceLanczos.h
index 0a2fe55c..344a785a 100644
--- a/Grid/algorithms/iterative/LocalCoherenceLanczos.h
+++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h
@@ -146,14 +146,21 @@ public:
   LinearOperatorBase<FineField> &_Linop;
   RealD                             _coarse_relax_tol;
   std::vector<FineField>        &_subspace;
+
+  int _largestEvalIdxForReport; //The convergence of the LCL is based on the evals of the coarse grid operator, not those of the underlying fine grid operator
+                                //As a result we do not know what the eval range of the fine operator is until the very end, making tuning the Cheby bounds very difficult
+                                //To work around this issue, every restart we separately reconstruct the fine operator eval for the lowest and highest evec and print these
+                                //out alongside the evals of the coarse operator. To do so we need to know the index of the largest eval (i.e. Nstop-1)
+                                //NOTE: If largestEvalIdxForReport=-1 (default) then this is not performed
   
   ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly,
 					   OperatorFunction<FineField>   &smoother,
 					   LinearOperatorBase<FineField> &Linop,
 					   std::vector<FineField>        &subspace,
-					   RealD coarse_relax_tol=5.0e3) 
+					   RealD coarse_relax_tol=5.0e3,
+					   int largestEvalIdxForReport=-1) 
     : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
-      _coarse_relax_tol(coarse_relax_tol)  
+      _coarse_relax_tol(coarse_relax_tol), _largestEvalIdxForReport(largestEvalIdxForReport)
   {    };
 
   //evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
@@ -179,6 +186,12 @@ public:
 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
 	     <<std::endl;
 
+    if(_largestEvalIdxForReport != -1 && (j==0 || j==_largestEvalIdxForReport)){
+      std::cout<<GridLogIRL << "Estimating true eval of fine grid operator for eval idx " << j << std::endl;
+      RealD tmp_eval;
+      ReconstructEval(j,eresid,B,tmp_eval,1.0); //don't use evalMaxApprox of coarse operator! (cf below)
+    }
+    
     int conv=0;
     if( (vv<eresid*eresid) ) conv = 1;
     return conv;
@@ -409,7 +422,7 @@ public:
     //////////////////////////////////////////////////////////////////////////////////////////////////
 
     Chebyshev<FineField>                                           ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
-    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); 
+    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax,Nstop-1); 
 
     evals_coarse.resize(Nm);
     evec_coarse.resize(Nm,_CoarseGrid);
diff --git a/Grid/qcd/observables/topological_charge.h b/Grid/qcd/observables/topological_charge.h
index 4f116496..7c09a180 100644
--- a/Grid/qcd/observables/topological_charge.h
+++ b/Grid/qcd/observables/topological_charge.h
@@ -99,7 +99,7 @@ public:
 	// using wilson flow by default here
 	WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
 	WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
-	Real T0   = WF.energyDensityPlaquette(Usmear);
+	Real T0   = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
 	std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
 		  << "T0                : [ " << traj << " ] "<< T0 << std::endl;
       }
diff --git a/Grid/qcd/smearing/WilsonFlow.h b/Grid/qcd/smearing/WilsonFlow.h
index 19fd94e2..0d1ee5d2 100644
--- a/Grid/qcd/smearing/WilsonFlow.h
+++ b/Grid/qcd/smearing/WilsonFlow.h
@@ -7,6 +7,7 @@ Source file: ./lib/qcd/modules/plaquette.h
 Copyright (C) 2017
 
 Author: Guido Cossu <guido.cossu@ed.ac.uk>
+Author: Christopher Kelly <ckelly@bnl.gov>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -33,28 +34,44 @@ NAMESPACE_BEGIN(Grid);
 
 template <class Gimpl>
 class WilsonFlow: public Smear<Gimpl>{
+public:
+  //Store generic measurements to take during smearing process using std::function
+  typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType;  //int: step,  RealD: flow time,  GaugeField : the gauge field
+  
+private:
   unsigned int Nstep;
-  unsigned int measure_interval;
-  mutable RealD epsilon, taus;
-
+  RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
+ 
+  std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
 
   mutable WilsonGaugeAction<Gimpl> SG;
 
-  void evolve_step(typename Gimpl::GaugeField&) const;
-  void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
-  RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
+  //Evolve the gauge field by 1 step and update tau
+  void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
+  //Evolve the gauge field by 1 step and update tau and the current time step eps
+  void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
 
 public:
   INHERIT_GIMPL_TYPES(Gimpl)
 
+  void resetActions(){ functions.clear(); }
+
+  void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
+
+  //Set the class to perform the default measurements: 
+  //the plaquette energy density every step
+  //the plaquette topological charge every 'topq_meas_interval' steps
+  //and output to stdout
+  void setDefaultMeasurements(int topq_meas_interval = 1);
+
   explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
   Nstep(Nstep),
     epsilon(epsilon),
-    measure_interval(interval),
     SG(WilsonGaugeAction<Gimpl>(3.0)) {
     // WilsonGaugeAction with beta 3.0
     assert(epsilon > 0.0);
     LogMessage();
+    setDefaultMeasurements(interval);
   }
 
   void LogMessage() {
@@ -73,9 +90,29 @@ public:
     // undefined for WilsonFlow
   }
 
-  void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
-  RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
-  RealD energyDensityPlaquette(const GaugeField& U) const;
+  void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
+
+  //Compute t^2 <E(t)> for time t from the plaquette
+  static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
+
+  //Compute t^2 <E(t)> for time t from the 1x1 cloverleaf form
+  //t is the Wilson flow time
+  static RealD energyDensityCloverleaf(const RealD t, const GaugeField& U);
+  
+  //Evolve the gauge field by Nstep steps of epsilon and return the energy density computed every interval steps
+  //The smeared field is output as V
+  std::vector<RealD> flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval = 1);
+
+  //Version that does not return the smeared field
+  std::vector<RealD> flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval = 1);
+
+
+  //Evolve the gauge field by Nstep steps of epsilon and return the Cloverleaf energy density computed every interval steps
+  //The smeared field is output as V
+  std::vector<RealD> flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval = 1);
+
+  //Version that does not return the smeared field
+  std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
 };
 
 
@@ -83,7 +120,7 @@ public:
 // Implementations
 ////////////////////////////////////////////////////////////////////////////////
 template <class Gimpl>
-void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
+void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
   GaugeField Z(U.Grid());
   GaugeField tmp(U.Grid());
   SG.deriv(U, Z);
@@ -99,12 +136,13 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
   SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2
   Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
   Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2
+  tau += epsilon;
 }
 
 template <class Gimpl>
-void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
-  if (maxTau - taus < epsilon){
-    epsilon = maxTau-taus;
+void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
+  if (maxTau - tau < eps){
+    eps = maxTau-tau;
   }
   //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
   GaugeField Z(U.Grid());
@@ -114,95 +152,151 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
   SG.deriv(U, Z);
   Zprime = -Z;
   Z *= 0.25;                                  // Z0 = 1/4 * F(U)
-  Gimpl::update_field(Z, U, -2.0*epsilon);    // U = W1 = exp(ep*Z0)*W0
+  Gimpl::update_field(Z, U, -2.0*eps);    // U = W1 = exp(ep*Z0)*W0
 
   Z *= -17.0/8.0;
   SG.deriv(U, tmp); Z += tmp;                 // -17/32*Z0 +Z1
   Zprime += 2.0*tmp;
   Z *= 8.0/9.0;                               // Z = -17/36*Z0 +8/9*Z1
-  Gimpl::update_field(Z, U, -2.0*epsilon);    // U_= W2 = exp(ep*Z)*W1
+  Gimpl::update_field(Z, U, -2.0*eps);    // U_= W2 = exp(ep*Z)*W1
     
 
   Z *= -4.0/3.0;
   SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2
   Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
-  Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2
+  Gimpl::update_field(Z, U, -2.0*eps);    // V(t+e) = exp(ep*Z)*W2
 
   // Ramos 
-  Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
+  Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
   // Compute distance as norm^2 of the difference
   GaugeField diffU = U - Uprime;
   RealD diff = norm2(diffU);
   // adjust integration step
     
-  taus += epsilon;
+  tau += eps;
   //std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
     
-  epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
+  eps = eps*0.95*std::pow(1e-4/diff,1./3.);
   //std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
 
 }
 
+
 template <class Gimpl>
-RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
-  RealD td = tau(step);
-  return 2.0 * td * td * SG.S(U)/U.Grid()->gSites();
+RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
+  static WilsonGaugeAction<Gimpl> SG(3.0);
+  return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
+}
+
+//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
+template <class Gimpl>
+RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
+  typedef typename Gimpl::GaugeLinkField GaugeMat;
+  typedef typename Gimpl::GaugeField GaugeLorentz;
+
+  assert(Nd == 4);
+  //E = 1/2 tr( F_munu F_munu )
+  //However as  F_numu = -F_munu, only need to sum the trace of the squares of the following 6 field strengths:
+  //F_01 F_02 F_03   F_12 F_13  F_23
+  GaugeMat F(U.Grid());
+  LatticeComplexD R(U.Grid());
+  R = Zero();
+  
+  for(int mu=0;mu<3;mu++){
+    for(int nu=mu+1;nu<4;nu++){
+      WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
+      R = R + trace(F*F);
+    }
+  }
+  ComplexD out = sum(R);
+  out = t*t*out / RealD(U.Grid()->gSites());
+  return -real(out); //minus sign necessary for +ve energy
+}
+
+
+template <class Gimpl>
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
+  std::vector<RealD> out;
+  resetActions();
+  addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){ 
+      std::cout << GridLogMessage << "[WilsonFlow] Computing plaquette energy density for step " << step << std::endl;
+      out.push_back( energyDensityPlaquette(t,U) );
+    });      
+  smear(V,U);
+  return out;
 }
 
 template <class Gimpl>
-RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
-  return 2.0 * taus * taus * SG.S(U)/U.Grid()->gSites();
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
+  GaugeField V(U);
+  return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
 }
 
+template <class Gimpl>
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
+  std::vector<RealD> out;
+  resetActions();
+  addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){ 
+      std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
+      out.push_back( energyDensityCloverleaf(t,U) );
+    });      
+  smear(V,U);
+  return out;
+}
+
+template <class Gimpl>
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
+  GaugeField V(U);
+  return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
+}
+
+
 
 //#define WF_TIMING 
-
-
-
 template <class Gimpl>
-void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
+void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
   out = in;
-  for (unsigned int step = 1; step <= Nstep; step++) {
+  RealD taus = 0.;
+  for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
     auto start = std::chrono::high_resolution_clock::now();
-    evolve_step(out);
+    evolve_step(out, taus);
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> diff = end - start;
 #ifdef WF_TIMING
     std::cout << "Time to evolve " << diff.count() << " s\n";
 #endif
-    std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
-		  << step << "  " << tau(step) << "  " 
-	      << energyDensityPlaquette(step,out) << std::endl;
-    if( step % measure_interval == 0){
-      std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "
-		<< step << "  " 
-		<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
-    }
+    //Perform measurements
+    for(auto const &meas : functions)
+      if( step % meas.first == 0 ) meas.second(step,taus,out);
   }
 }
 
 template <class Gimpl>
-void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
+void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
   out = in;
-  taus = epsilon;
+  RealD taus = 0.;
+  RealD eps = epsilon;
   unsigned int step = 0;
   do{
     step++;
     //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
-    evolve_step_adaptive(out, maxTau);
-    std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
-		  << step << "  " << taus << "  "
-	      << energyDensityPlaquette(out) << std::endl;
-    if( step % measure_interval == 0){
-      std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "
-		<< step << "  " 
-		<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
-    }
+    evolve_step_adaptive(out, taus, eps, maxTau);
+    //Perform measurements
+    for(auto const &meas : functions)
+      if( step % meas.first == 0 ) meas.second(step,taus,out);
   } while (taus < maxTau);
-
-
-
 }
 
+template <class Gimpl>
+void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
+  addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
+      std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "  << step << "  " << t << "  " << energyDensityPlaquette(t,U) << std::endl;
+    });
+  addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
+      std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "  << step << "  " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
+    });
+}
+
+
 NAMESPACE_END(Grid);
 
diff --git a/Grid/tensors/Tensor_extract_merge.h b/Grid/tensors/Tensor_extract_merge.h
index ea619d0f..ab14f81f 100644
--- a/Grid/tensors/Tensor_extract_merge.h
+++ b/Grid/tensors/Tensor_extract_merge.h
@@ -208,5 +208,46 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////////
+//Copy a single lane of a SIMD tensor type from one object to another
+//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
+///////////////////////////////////////////////////////////////////////////////////
+template<class vobjOut, class vobjIn>
+accelerator_inline 
+void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
+{
+  static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
+
+  typedef typename vobjOut::vector_type ovector_type;  
+  typedef typename vobjIn::vector_type ivector_type;  
+  constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
+  constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
+  static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
+
+  typedef typename vobjOut::scalar_type oscalar_type;  
+  typedef typename vobjIn::scalar_type iscalar_type;  
+  typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
+  typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
+
+  typedef oextract_type * opointer;
+  typedef iextract_type * ipointer;
+
+  constexpr int oNsimd=ovector_type::Nsimd();
+  constexpr int iNsimd=ivector_type::Nsimd();
+
+  iscalar_type itmp;
+  oscalar_type otmp;
+
+  opointer __restrict__  op = (opointer)&vecOut;
+  ipointer __restrict__  ip = (ipointer)&vecIn;
+  for(int w=0;w<owords;w++){
+    memcpy( (char*)&itmp, (char*)(ip + lane_in + iNsimd*w), sizeof(iscalar_type) );
+    otmp = itmp; //potential precision change
+    memcpy( (char*)(op + lane_out + oNsimd*w), (char*)&otmp, sizeof(oscalar_type) );
+  }
+}
+
+
 NAMESPACE_END(Grid);
 
diff --git a/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
new file mode 100644
index 00000000..6de636bd
--- /dev/null
+++ b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
@@ -0,0 +1,918 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
+
+Copyright (C) 2015-2016
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+//Production binary for the 40ID G-parity ensemble
+
+struct RatQuoParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
+				  double, bnd_lo,
+				  double, bnd_hi,
+				  Integer, action_degree,
+				  double, action_tolerance,
+				  Integer, md_degree,
+				  double, md_tolerance,
+				  Integer, reliable_update_freq,
+				  Integer, bnd_check_freq);
+  RatQuoParameters() { 
+    bnd_lo = 1e-2;
+    bnd_hi = 30;
+    action_degree = 10;
+    action_tolerance = 1e-10;
+    md_degree = 10;
+    md_tolerance = 1e-8;
+    bnd_check_freq = 20;
+    reliable_update_freq = 50;
+  }
+
+  void Export(RationalActionParams &into) const{
+    into.lo = bnd_lo;
+    into.hi = bnd_hi;
+    into.action_degree = action_degree;
+    into.action_tolerance = action_tolerance;
+    into.md_degree = md_degree;
+    into.md_tolerance = md_tolerance;
+    into.BoundsCheckFreq = bnd_check_freq;
+  }
+};
+
+struct EOFAparameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
+				  OneFlavourRationalParams, rat_params,
+				  double, action_tolerance,
+				  double, action_mixcg_inner_tolerance,
+				  double, md_tolerance,
+				  double, md_mixcg_inner_tolerance);
+
+  EOFAparameters() { 
+    action_mixcg_inner_tolerance = 1e-8;
+    action_tolerance = 1e-10;
+    md_tolerance = 1e-8;
+    md_mixcg_inner_tolerance = 1e-8;
+
+    rat_params.lo = 1.0;
+    rat_params.hi = 25.0;
+    rat_params.MaxIter  = 50000;
+    rat_params.tolerance= 1.0e-9;
+    rat_params.degree   = 14;
+    rat_params.precision= 50;
+  }
+};
+
+struct EvolParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
+                                  Integer, StartTrajectory,
+                                  Integer, Trajectories,
+				  Integer, SaveInterval,
+				  Integer, Steps,
+				  RealD, TrajectoryLength,
+                                  bool, MetropolisTest,
+				  std::string, StartingType,
+				  std::vector<Integer>, GparityDirs,
+				  std::vector<EOFAparameters>, eofa_l,
+				  RatQuoParameters, rat_quo_s,
+				  RatQuoParameters, rat_quo_DSDR);
+
+  EvolParameters() {
+    //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
+    MetropolisTest    = false;
+    StartTrajectory   = 0;
+    Trajectories      = 50;
+    SaveInterval = 5;
+    StartingType      = "ColdStart";
+    GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
+    Steps = 5;
+    TrajectoryLength = 1.0;
+  }
+};
+
+bool fileExists(const std::string &fn){
+  std::ifstream f(fn);
+  return f.good();
+}
+
+
+
+
+struct LanczosParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
+				  double, alpha,
+				  double, beta,
+				  double, mu,
+				  int, ord,
+				  int, n_stop,
+				  int, n_want,
+				  int, n_use,
+				  double, tolerance);
+
+  LanczosParameters() {
+    alpha = 35;
+    beta = 5;
+    mu = 0;
+    ord = 100;
+    n_stop = 10;
+    n_want = 10;
+    n_use = 15;
+    tolerance = 1e-6;
+  }
+};
+
+
+
+template<typename FermionActionD, typename FermionFieldD>
+void computeEigenvalues(std::string param_file,
+			GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+			FermionActionD &action, GridParallelRNG &rng){
+  
+  LanczosParameters params;
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "LanczosParameters", params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "LanczosParameters", params);
+  }
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  action.ImportGauge(latt);
+
+  SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
+  PlainHermOp<FermionFieldD> hermop_wrap(hermop);
+  //ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
+  assert(params.mu == 0.0);
+
+  Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
+  FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
+
+  std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
+  ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 50000);
+
+  std::vector<RealD> eval(params.n_use);
+  std::vector<FermionFieldD> evec(params.n_use, rbGrid);
+  int Nconv;
+  IRL.calc(eval, evec, gauss_o, Nconv);
+
+  std::cout << "Eigenvalues:" << std::endl;
+  for(int i=0;i<params.n_want;i++){
+    std::cout << i << " " << eval[i] << std::endl;
+  }
+}
+
+
+//Check the quality of the RHMC approx
+//action_or_md toggles checking the action (0), MD (1) or both (2) setups
+template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
+void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+	       FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
+	       int inv_pow, const std::string &quark_descr, int action_or_md){
+  assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
+  
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  numOp.ImportGauge(latt);
+  denOp.ImportGauge(latt);
+
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
+  SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
+
+  PowerMethod<FermionFieldD> power_method;
+  RealD lambda_max;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
+
+  lambda_max = power_method(MdagM,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
+  lambda_max = power_method(VdagV,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  if(action_or_md == 0 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+
+  std::cout << "-------------------------------------------------------------------------------" << std::endl;
+
+  if(action_or_md == 1 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD); 
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+}
+
+
+template<typename FermionImplPolicy>
+void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+	       GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  RealD scale = std::sqrt(0.5);
+  gaussian(rng,eta); eta = eta * scale;
+
+  //Use the inbuilt check
+  EOFA.refresh(latt, eta);
+  EOFA.S(latt);
+  std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
+}
+
+
+template<typename FermionImplPolicy>
+class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
+  EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
+}
+
+//Applications of M^{-1} cost the same as M for EOFA!
+template<typename FermionImplPolicy>
+class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
+  EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
+}
+
+
+NAMESPACE_BEGIN(Grid);
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD   Tolerance;
+    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
+    Integer MaxInnerIterations;
+    Integer MaxOuterIterations;
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+
+    Integer TotalInnerIterations; //Number of inner CG iterations
+    Integer TotalOuterIterations; //Number of restarts
+    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+
+    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
+						    Integer maxinnerit, 
+						    Integer maxouterit, 
+						    GridBase* _sp_grid4, 
+						    GridBase* _sp_grid5, 
+						    FermionOperatorF &_FermOpF,
+						    FermionOperatorD &_FermOpD,
+						    SchurOperatorF   &_LinOpF,
+						    SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      InnerTolerance(tol), 
+      MaxInnerIterations(maxinnerit), 
+      MaxOuterIterations(maxouterit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5),
+      OuterLoopNormMult(100.) 
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
+      MPCG.InnerTolerance = InnerTolerance;
+      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD Tolerance;
+    Integer MaxIterations;
+
+    RealD Delta; //reliable update parameter
+
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+    
+    MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol, 
+								  RealD delta,
+								  Integer maxit, 
+								  GridBase* _sp_grid4, 
+								  GridBase* _sp_grid5, 
+								  FermionOperatorF &_FermOpF,
+								  FermionOperatorD &_FermOpD,
+								  SchurOperatorF   &_LinOpF,
+								  SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      Delta(delta),
+      MaxIterations(maxit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5)
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+
+      ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
+      std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+
+NAMESPACE_END(Grid);
+
+
+
+
+
+int main(int argc, char **argv) {
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  std::string param_file = "params.xml";
+  bool file_load_check = false;
+
+  std::string serial_seeds = "1 2 3 4 5";
+  std::string parallel_seeds = "6 7 8 9 10";
+
+  int i=1;
+  while(i < argc){
+    std::string sarg(argv[i]);
+    if(sarg == "--param_file"){
+      assert(i!=argc-1);
+      param_file = argv[i+1];
+      i+=2;
+    }else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
+      file_load_check = true;
+      i++;
+    }else if(sarg == "--set_seeds"){ //set the rng seeds. Expects two vector args, e.g.  --set_seeds 1.2.3.4 5.6.7.8
+      assert(i < argc-2);
+      std::vector<int> tmp;
+      GridCmdOptionIntVector(argv[i+1],tmp);
+      {
+	std::stringstream ss;
+	for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
+	ss << tmp.back();
+	serial_seeds = ss.str();
+      }
+      GridCmdOptionIntVector(argv[i+2],tmp);
+      {
+	std::stringstream ss;
+	for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
+	ss << tmp.back();
+	parallel_seeds = ss.str();
+      }
+      i+=3;
+      std::cout << GridLogMessage << "Set serial seeds to " << serial_seeds << std::endl;
+      std::cout << GridLogMessage << "Set parallel seeds to " << parallel_seeds << std::endl;
+      
+    }else{
+      i++;
+    }
+  }
+
+  
+  //Read the user parameters
+  EvolParameters user_params;
+  
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "Params", user_params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    {
+      Grid::XmlWriter wr(param_file + ".templ");
+      write(wr, "Params", user_params);
+    }
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+  //Check the parameters
+  if(user_params.GparityDirs.size() != Nd-1){
+    std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
+    exit(1);
+  }
+  for(int i=0;i<Nd-1;i++)
+    if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
+      std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
+      exit(1);
+    }
+
+
+  typedef GparityMobiusEOFAFermionD EOFAactionD;
+  typedef GparityMobiusFermionD FermionActionD;
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef GparityMobiusEOFAFermionF EOFAactionF;
+  typedef GparityMobiusFermionF FermionActionF;
+  typedef typename FermionActionF::Impl_t FermionImplPolicyF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  IntegratorParameters MD;
+  typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
+  MD.name    = std::string("MinimumNorm2");
+
+  // typedef ConjugateHMCRunnerD<ForceGradient> HMCWrapper;
+  // MD.name    = std::string("ForceGradient");
+  
+  MD.MDsteps = user_params.Steps;
+  MD.trajL   = user_params.TrajectoryLength;
+
+  typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
+  
+  HMCparameters HMCparams;
+  HMCparams.StartTrajectory  = user_params.StartTrajectory;
+  HMCparams.Trajectories     = user_params.Trajectories;
+  HMCparams.NoMetropolisUntil= 0;
+  HMCparams.StartingType     = user_params.StartingType;
+  HMCparams.MetropolisTest = user_params.MetropolisTest;
+  HMCparams.MD = MD;
+  HMCWrapper TheHMC(HMCparams);
+
+  // Grid from the command line arguments --grid and --mpi
+  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
+
+  CheckpointerParameters CPparams;
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix    = "ckpoint_rng";
+  CPparams.saveInterval  = user_params.SaveInterval;
+  CPparams.format        = "IEEE64BIG";
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = serial_seeds;
+  RNGpar.parallel_seeds = parallel_seeds;
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+  //aiming for ainv=1.723 GeV
+  //                                  me         bob
+  //Estimated  a(ml+mres) [40ID] = 0.001305    0.00131
+  //           a(mh+mres) [40ID] = 0.035910    0.03529
+  //Estimate Ls=12, b+c=2  mres~0.0011
+
+  //1/24/2022 initial mres measurement gives mres=0.001,  adjusted light quark mass to 0.0003 from 0.0001
+  
+  const int Ls      = 12;
+  Real beta         = 1.848;
+  Real light_mass   = 0.0003;
+  Real strange_mass = 0.0342;
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+  RealD mobius_scale = 2.; //b+c
+
+  RealD mob_bmc = 1.0;
+  RealD mob_b = (mobius_scale + mob_bmc)/2.;
+  RealD mob_c = (mobius_scale - mob_bmc)/2.;
+
+  std::cout << GridLogMessage
+	    << "Ensemble parameters:" << std::endl
+	    << "Ls=" << Ls << std::endl
+	    << "beta=" << beta << std::endl
+	    << "light_mass=" << light_mass << std::endl
+	    << "strange_mass=" << strange_mass << std::endl
+	    << "mobius_scale=" << mobius_scale << std::endl;
+  
+  //Setup the Grids
+  auto UGridD   = TheHMC.Resources.GetCartesian();
+  auto UrbGridD = TheHMC.Resources.GetRBCartesian();
+  auto FGridD     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
+  auto FrbGridD   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
+
+  GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
+
+  ConjugateIwasakiGaugeActionD GaugeAction(beta);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(UGridD);
+  LatticeGaugeFieldF Uf(UGridF);
+ 
+  //Setup the BCs
+  FermionActionD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+
+  std::vector<int> dirs4(Nd);
+  for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
+  dirs4[Nd-1] = 0; //periodic gauge BC in time
+
+  GaugeImplPolicy::setDirections(dirs4); //gauge BC
+
+  //Run optional gauge field checksum checker and exit
+  if(file_load_check){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  ////////////////////////////////////
+  // Collect actions
+  ////////////////////////////////////
+  ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
+  ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
+  ActionLevel<HMCWrapper::Field> Level3(2); //gauge
+
+
+  /////////////////////////////////////////////////////////////
+  // Light EOFA action
+  // have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
+  /////////////////////////////////////////////////////////////
+  typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
+  typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
+  typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
+  typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
+  typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
+
+
+  std::vector<RealD> eofa_light_masses = { light_mass ,  0.004,   0.016,   0.064,   0.256    };
+  std::vector<RealD> eofa_pv_masses =    { 0.004       , 0.016,   0.064,   0.256,   1.0      };
+  int n_light_hsb = 5;
+  assert(user_params.eofa_l.size() == n_light_hsb);
+  
+  EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
+
+  for(int i=0;i<n_light_hsb;i++){
+    RealD iml = eofa_light_masses[i];
+    RealD ipv = eofa_pv_masses[i];
+
+    EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+    EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+
+    EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
+    EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
+    
+    EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
+    EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
+
+#if 1
+    //Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance  as Delta for now
+    EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+    EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+#else
+    EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
+    std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
+#endif
+
+    EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
+							*LopD, *RopD, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*DerivMCG_L, *DerivMCG_R, 
+							user_params.eofa_l[i].rat_params, true);
+    EOFA_pfactions[i] = EOFA;
+    Level1.push_back(EOFA);
+  }
+
+  ////////////////////////////////////
+  // Strange action
+  ////////////////////////////////////
+  FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
+
+  FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
+
+  RationalActionParams rat_act_params_s;
+  rat_act_params_s.inv_pow  = 4; // (M^dag M)^{1/4}
+  rat_act_params_s.precision= 60;
+  rat_act_params_s.MaxIter  = 50000;
+  user_params.rat_quo_s.Export(rat_act_params_s);
+  std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); 
+  DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); 
+  Level1.push_back(&Quotient_s);  
+
+  ///////////////////////////////////
+  // DSDR action
+  ///////////////////////////////////
+  RealD dsdr_mass=-1.8;   
+  //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
+  RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
+  RealD dsdr_epsilon_b = 0.5; 
+  GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
+  GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
+
+  GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
+  GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
+ 
+  RationalActionParams rat_act_params_DSDR;
+  rat_act_params_DSDR.inv_pow  = 2; // (M^dag M)^{1/2}
+  rat_act_params_DSDR.precision= 60;
+  rat_act_params_DSDR.MaxIter  = 50000;
+  user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
+  std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
+  Level2.push_back(&Quotient_DSDR);
+
+  /////////////////////////////////////////////////////////////
+  // Gauge action
+  /////////////////////////////////////////////////////////////
+  Level3.push_back(&GaugeAction);
+
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  TheHMC.TheAction.push_back(Level3);
+  std::cout << GridLogMessage << " Action complete "<< std::endl;
+
+
+  //Action tuning
+  bool 
+    tune_rhmc_s=false, eigenrange_s=false, 
+    tune_rhmc_DSDR=false, eigenrange_DSDR=false, 
+    check_eofa=false, 
+    upper_bound_eofa=false, lower_bound_eofa(false);
+
+  std::string lanc_params_s;
+  std::string lanc_params_DSDR;
+  int tune_rhmc_s_action_or_md;
+  int tune_rhmc_DSDR_action_or_md;
+  int eofa_which_hsb;
+
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--tune_rhmc_s"){
+      assert(i < argc-1);
+      tune_rhmc_s=true;
+      tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_s"){
+      assert(i < argc-1);
+      eigenrange_s=true;
+      lanc_params_s = argv[i+1];
+    }
+    else if(sarg == "--tune_rhmc_DSDR"){
+      assert(i < argc-1);
+      tune_rhmc_DSDR=true;
+      tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_DSDR"){
+      assert(i < argc-1);
+      eigenrange_DSDR=true;
+      lanc_params_DSDR = argv[i+1];
+    }
+    else if(sarg == "--check_eofa"){
+      assert(i < argc-1);
+      check_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
+      assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
+    }
+    else if(sarg == "--upper_bound_eofa"){
+      assert(i < argc-1);
+      upper_bound_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+    else if(sarg == "--lower_bound_eofa"){
+      assert(i < argc-1);
+      lower_bound_eofa = true;      
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+  }
+  if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
+    std::cout << GridLogMessage << "Running checks" << std::endl;
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+
+    //std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
+    //std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
+
+    if(check_eofa){
+      if(eofa_which_hsb >= 0){
+	std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+	checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+      }else{
+	for(int i=0;i<n_light_hsb;i++){
+	  std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
+	  checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	  std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
+	}
+      }
+    }	  
+    if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange",  tune_rhmc_s_action_or_md);
+    if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
+
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  //Run the HMC
+  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
+  TheHMC.Run();
+
+  std::cout << GridLogMessage << " Done" << std::endl;
+  Grid_finalize();
+  return 0;
+} // main
diff --git a/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
new file mode 100644
index 00000000..42f54edd
--- /dev/null
+++ b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
@@ -0,0 +1,873 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
+
+Copyright (C) 2015-2016
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+//Production binary for the 40ID G-parity ensemble
+
+struct RatQuoParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
+				  double, bnd_lo,
+				  double, bnd_hi,
+				  Integer, action_degree,
+				  double, action_tolerance,
+				  Integer, md_degree,
+				  double, md_tolerance,
+				  Integer, reliable_update_freq,
+				  Integer, bnd_check_freq);
+  RatQuoParameters() { 
+    bnd_lo = 1e-2;
+    bnd_hi = 30;
+    action_degree = 10;
+    action_tolerance = 1e-10;
+    md_degree = 10;
+    md_tolerance = 1e-8;
+    bnd_check_freq = 20;
+    reliable_update_freq = 50;
+  }
+
+  void Export(RationalActionParams &into) const{
+    into.lo = bnd_lo;
+    into.hi = bnd_hi;
+    into.action_degree = action_degree;
+    into.action_tolerance = action_tolerance;
+    into.md_degree = md_degree;
+    into.md_tolerance = md_tolerance;
+    into.BoundsCheckFreq = bnd_check_freq;
+  }
+};
+
+struct EOFAparameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
+				  OneFlavourRationalParams, rat_params,
+				  double, action_tolerance,
+				  double, action_mixcg_inner_tolerance,
+				  double, md_tolerance,
+				  double, md_mixcg_inner_tolerance);
+
+  EOFAparameters() { 
+    action_mixcg_inner_tolerance = 1e-8;
+    action_tolerance = 1e-10;
+    md_tolerance = 1e-8;
+    md_mixcg_inner_tolerance = 1e-8;
+
+    rat_params.lo = 1.0;
+    rat_params.hi = 25.0;
+    rat_params.MaxIter  = 10000;
+    rat_params.tolerance= 1.0e-9;
+    rat_params.degree   = 14;
+    rat_params.precision= 50;
+  }
+};
+
+struct EvolParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
+                                  Integer, StartTrajectory,
+                                  Integer, Trajectories,
+				  Integer, SaveInterval,
+				  Integer, Steps,
+				  RealD, TrajectoryLength,
+                                  bool, MetropolisTest,
+				  std::string, StartingType,
+				  std::vector<Integer>, GparityDirs,
+				  std::vector<EOFAparameters>, eofa_l,
+				  RatQuoParameters, rat_quo_s,
+				  RatQuoParameters, rat_quo_DSDR);
+
+  EvolParameters() {
+    //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
+    MetropolisTest    = false;
+    StartTrajectory   = 0;
+    Trajectories      = 50;
+    SaveInterval = 5;
+    StartingType      = "ColdStart";
+    GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
+    Steps = 5;
+    TrajectoryLength = 1.0;
+  }
+};
+
+bool fileExists(const std::string &fn){
+  std::ifstream f(fn);
+  return f.good();
+}
+
+
+
+
+struct LanczosParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
+				  double, alpha,
+				  double, beta,
+				  double, mu,
+				  int, ord,
+				  int, n_stop,
+				  int, n_want,
+				  int, n_use,
+				  double, tolerance);
+
+  LanczosParameters() {
+    alpha = 35;
+    beta = 5;
+    mu = 0;
+    ord = 100;
+    n_stop = 10;
+    n_want = 10;
+    n_use = 15;
+    tolerance = 1e-6;
+  }
+};
+
+
+
+template<typename FermionActionD, typename FermionFieldD>
+void computeEigenvalues(std::string param_file,
+			GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+			FermionActionD &action, GridParallelRNG &rng){
+  
+  LanczosParameters params;
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "LanczosParameters", params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "LanczosParameters", params);
+  }
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  action.ImportGauge(latt);
+
+  SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
+  PlainHermOp<FermionFieldD> hermop_wrap(hermop);
+  //ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
+  assert(params.mu == 0.0);
+
+  Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
+  FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
+
+  std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
+  ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000);
+
+  std::vector<RealD> eval(params.n_use);
+  std::vector<FermionFieldD> evec(params.n_use, rbGrid);
+  int Nconv;
+  IRL.calc(eval, evec, gauss_o, Nconv);
+
+  std::cout << "Eigenvalues:" << std::endl;
+  for(int i=0;i<params.n_want;i++){
+    std::cout << i << " " << eval[i] << std::endl;
+  }
+}
+
+
+//Check the quality of the RHMC approx
+//action_or_md toggles checking the action (0), MD (1) or both (2) setups
+template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
+void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+	       FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
+	       int inv_pow, const std::string &quark_descr, int action_or_md){
+  assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
+  
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  numOp.ImportGauge(latt);
+  denOp.ImportGauge(latt);
+
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
+  SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
+
+  PowerMethod<FermionFieldD> power_method;
+  RealD lambda_max;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
+
+  lambda_max = power_method(MdagM,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
+  lambda_max = power_method(VdagV,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  if(action_or_md == 0 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+
+  std::cout << "-------------------------------------------------------------------------------" << std::endl;
+
+  if(action_or_md == 1 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD); 
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+}
+
+
+template<typename FermionImplPolicy>
+void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+	       GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  RealD scale = std::sqrt(0.5);
+  gaussian(rng,eta); eta = eta * scale;
+
+  //Use the inbuilt check
+  EOFA.refresh(latt, eta);
+  EOFA.S(latt);
+  std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
+}
+
+
+template<typename FermionImplPolicy>
+class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
+  EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
+}
+
+//Applications of M^{-1} cost the same as M for EOFA!
+template<typename FermionImplPolicy>
+class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
+  EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
+}
+
+
+NAMESPACE_BEGIN(Grid);
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD   Tolerance;
+    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
+    Integer MaxInnerIterations;
+    Integer MaxOuterIterations;
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+
+    Integer TotalInnerIterations; //Number of inner CG iterations
+    Integer TotalOuterIterations; //Number of restarts
+    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+
+    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
+						    Integer maxinnerit, 
+						    Integer maxouterit, 
+						    GridBase* _sp_grid4, 
+						    GridBase* _sp_grid5, 
+						    FermionOperatorF &_FermOpF,
+						    FermionOperatorD &_FermOpD,
+						    SchurOperatorF   &_LinOpF,
+						    SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      InnerTolerance(tol), 
+      MaxInnerIterations(maxinnerit), 
+      MaxOuterIterations(maxouterit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5),
+      OuterLoopNormMult(100.) 
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
+      MPCG.InnerTolerance = InnerTolerance;
+      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD Tolerance;
+    Integer MaxIterations;
+
+    RealD Delta; //reliable update parameter
+
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+    
+    MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol, 
+								  RealD delta,
+								  Integer maxit, 
+								  GridBase* _sp_grid4, 
+								  GridBase* _sp_grid5, 
+								  FermionOperatorF &_FermOpF,
+								  FermionOperatorD &_FermOpD,
+								  SchurOperatorF   &_LinOpF,
+								  SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      Delta(delta),
+      MaxIterations(maxit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5)
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+
+      ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
+      std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+
+NAMESPACE_END(Grid);
+
+
+
+
+
+int main(int argc, char **argv) {
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  std::string param_file = "params.xml";
+  bool file_load_check = false;
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--param_file"){
+      assert(i!=argc-1);
+      param_file = argv[i+1];
+    }else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
+      file_load_check = true;
+    }
+  }
+
+  //Read the user parameters
+  EvolParameters user_params;
+  
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "Params", user_params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    {
+      Grid::XmlWriter wr(param_file + ".templ");
+      write(wr, "Params", user_params);
+    }
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+  //Check the parameters
+  if(user_params.GparityDirs.size() != Nd-1){
+    std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
+    exit(1);
+  }
+  for(int i=0;i<Nd-1;i++)
+    if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
+      std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
+      exit(1);
+    }
+
+
+  typedef GparityMobiusEOFAFermionD EOFAactionD;
+  typedef GparityMobiusFermionD FermionActionD;
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef GparityMobiusEOFAFermionF EOFAactionF;
+  typedef GparityMobiusFermionF FermionActionF;
+  typedef typename FermionActionF::Impl_t FermionImplPolicyF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  IntegratorParameters MD;
+  typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
+  typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
+  MD.name    = std::string("MinimumNorm2");
+  MD.MDsteps = user_params.Steps;
+  MD.trajL   = user_params.TrajectoryLength;
+
+  HMCparameters HMCparams;
+  HMCparams.StartTrajectory  = user_params.StartTrajectory;
+  HMCparams.Trajectories     = user_params.Trajectories;
+  HMCparams.NoMetropolisUntil= 0;
+  HMCparams.StartingType     = user_params.StartingType;
+  HMCparams.MetropolisTest = user_params.MetropolisTest;
+  HMCparams.MD = MD;
+  HMCWrapper TheHMC(HMCparams);
+
+  // Grid from the command line arguments --grid and --mpi
+  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
+
+  CheckpointerParameters CPparams;
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix    = "ckpoint_rng";
+  CPparams.saveInterval  = user_params.SaveInterval;
+  CPparams.format        = "IEEE64BIG";
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+
+  //aiming for ainv=2.068             me          Bob
+  //Estimated  a(ml+mres) [48ID] = 0.001048    0.00104 
+  //           a(mh+mres) [48ID] = 0.028847    0.02805
+  //Estimate Ls=12, b+c=2  mres~0.0003
+
+  const int Ls      = 12;
+  Real beta         = 1.946;
+  Real light_mass   = 0.00074;   //0.00104 - mres_approx;
+  Real strange_mass = 0.02775;    //0.02805 - mres_approx
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+  RealD mobius_scale = 2.; //b+c
+
+  RealD mob_bmc = 1.0;
+  RealD mob_b = (mobius_scale + mob_bmc)/2.;
+  RealD mob_c = (mobius_scale - mob_bmc)/2.;
+
+  //Setup the Grids
+  auto UGridD   = TheHMC.Resources.GetCartesian();
+  auto UrbGridD = TheHMC.Resources.GetRBCartesian();
+  auto FGridD     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
+  auto FrbGridD   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
+
+  GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
+
+  ConjugateIwasakiGaugeActionD GaugeAction(beta);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(UGridD);
+  LatticeGaugeFieldF Uf(UGridF);
+ 
+  //Setup the BCs
+  FermionActionD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+
+  std::vector<int> dirs4(Nd);
+  for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
+  dirs4[Nd-1] = 0; //periodic gauge BC in time
+
+  GaugeImplPolicy::setDirections(dirs4); //gauge BC
+
+  //Run optional gauge field checksum checker and exit
+  if(file_load_check){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  ////////////////////////////////////
+  // Collect actions
+  ////////////////////////////////////
+  ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
+  ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
+  ActionLevel<HMCWrapper::Field> Level3(2); //gauge
+
+
+  /////////////////////////////////////////////////////////////
+  // Light EOFA action
+  // have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
+  /////////////////////////////////////////////////////////////
+  typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
+  typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
+  typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
+  typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
+  typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
+  
+  std::vector<RealD> eofa_light_masses = { light_mass ,  0.004,   0.016,   0.064,   0.256    };
+  std::vector<RealD> eofa_pv_masses =    { 0.004       , 0.016,   0.064,   0.256,   1.0      };
+  int n_light_hsb = 5;
+  assert(user_params.eofa_l.size() == n_light_hsb);
+  
+  EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
+
+  for(int i=0;i<n_light_hsb;i++){
+    RealD iml = eofa_light_masses[i];
+    RealD ipv = eofa_pv_masses[i];
+
+    EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+    EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+
+    EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
+    EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
+    
+    EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
+    EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
+
+#if 1
+    //Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance  as Delta for now
+    EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+    EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+#else
+    
+    EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
+    std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
+#endif
+
+    
+    EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
+							*LopD, *RopD, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*DerivMCG_L, *DerivMCG_R, 
+							user_params.eofa_l[i].rat_params, true);
+    EOFA_pfactions[i] = EOFA;
+    Level1.push_back(EOFA);
+  }
+
+  ////////////////////////////////////
+  // Strange action
+  ////////////////////////////////////
+  FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
+
+  FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
+
+  RationalActionParams rat_act_params_s;
+  rat_act_params_s.inv_pow  = 4; // (M^dag M)^{1/4}
+  rat_act_params_s.precision= 60;
+  rat_act_params_s.MaxIter  = 10000;
+  user_params.rat_quo_s.Export(rat_act_params_s);
+  std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); 
+  DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); 
+  Level1.push_back(&Quotient_s);  
+
+  ///////////////////////////////////
+  // DSDR action
+  ///////////////////////////////////
+  RealD dsdr_mass=-1.8;   
+  //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
+  RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
+  RealD dsdr_epsilon_b = 0.5; 
+  GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
+  GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
+
+  GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
+  GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
+ 
+  RationalActionParams rat_act_params_DSDR;
+  rat_act_params_DSDR.inv_pow  = 2; // (M^dag M)^{1/2}
+  rat_act_params_DSDR.precision= 60;
+  rat_act_params_DSDR.MaxIter  = 10000;
+  user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
+  std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
+  Level2.push_back(&Quotient_DSDR);
+
+  /////////////////////////////////////////////////////////////
+  // Gauge action
+  /////////////////////////////////////////////////////////////
+  Level3.push_back(&GaugeAction);
+
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  TheHMC.TheAction.push_back(Level3);
+  std::cout << GridLogMessage << " Action complete "<< std::endl;
+
+
+  //Action tuning
+  bool 
+    tune_rhmc_s=false, eigenrange_s=false, 
+    tune_rhmc_DSDR=false, eigenrange_DSDR=false, 
+    check_eofa=false, 
+    upper_bound_eofa=false, lower_bound_eofa(false);
+
+  std::string lanc_params_s;
+  std::string lanc_params_DSDR;
+  int tune_rhmc_s_action_or_md;
+  int tune_rhmc_DSDR_action_or_md;
+  int eofa_which_hsb;
+
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--tune_rhmc_s"){
+      assert(i < argc-1);
+      tune_rhmc_s=true;
+      tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_s"){
+      assert(i < argc-1);
+      eigenrange_s=true;
+      lanc_params_s = argv[i+1];
+    }
+    else if(sarg == "--tune_rhmc_DSDR"){
+      assert(i < argc-1);
+      tune_rhmc_DSDR=true;
+      tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_DSDR"){
+      assert(i < argc-1);
+      eigenrange_DSDR=true;
+      lanc_params_DSDR = argv[i+1];
+    }
+    else if(sarg == "--check_eofa"){
+      assert(i < argc-1);
+      check_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
+      assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
+    }
+    else if(sarg == "--upper_bound_eofa"){
+      assert(i < argc-1);
+      upper_bound_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+    else if(sarg == "--lower_bound_eofa"){
+      assert(i < argc-1);
+      lower_bound_eofa = true;      
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+  }
+  if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
+    std::cout << GridLogMessage << "Running checks" << std::endl;
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+
+    //std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
+    //std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
+
+
+    if(check_eofa){
+      if(eofa_which_hsb >= 0){
+	std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+	checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+      }else{
+	for(int i=0;i<n_light_hsb;i++){
+	  std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
+	  checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	  std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
+	}
+      }
+    }	  
+    if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange",  tune_rhmc_s_action_or_md);
+    if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
+
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  //Run the HMC
+  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
+  TheHMC.Run();
+
+  std::cout << GridLogMessage << " Done" << std::endl;
+  Grid_finalize();
+  return 0;
+} // main
diff --git a/tests/IO/Test_field_array_io.cc b/tests/IO/Test_field_array_io.cc
new file mode 100644
index 00000000..51ea7893
--- /dev/null
+++ b/tests/IO/Test_field_array_io.cc
@@ -0,0 +1,184 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/IO/Test_field_array_io.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+
+//This test demonstrates and checks a single-file write of an arbitrary array of fields
+
+uint64_t writeHeader(const uint32_t size, const uint32_t checksum, const std::string &format, const std::string &file){
+  std::ofstream fout(file,std::ios::out|std::ios::in);
+  fout.seekp(0,std::ios::beg);
+  fout << std::setw(10) << size << std::endl;
+  fout << std::hex << std::setw(10) << checksum << std::endl;
+  fout << format << std::endl;
+  return fout.tellp();
+}
+ 
+uint64_t readHeader(uint32_t &size, uint32_t &checksum, std::string &format, const std::string &file){
+  std::ifstream fin(file);
+  std::string line;
+  getline(fin,line);
+  {
+    std::stringstream ss; ss <<line ; ss >> size;
+  }
+  getline(fin,line);
+  {
+    std::stringstream ss; ss <<line ; ss >> std::hex >> checksum;
+  }
+  getline(fin,format);
+  removeWhitespace(format);
+      
+  return fin.tellg();
+}
+ 
+template<typename FieldType>
+void writeFieldArray(const std::string &file, const std::vector<FieldType> &data){
+  typedef typename FieldType::vector_object vobj;
+  typedef typename FieldType::scalar_object sobj;
+  GridBase* grid = data[0].Grid(); //assume all fields have the same Grid
+  BinarySimpleMunger<sobj, sobj> munge; //straight copy
+
+  //We need a 2-pass header write, first to establish the size, the second pass writes the checksum
+  std::string format = getFormatString<typename FieldType::vector_object>();
+
+  uint64_t offset; //leave 64 bits for header
+  if ( grid->IsBoss() ) { 
+    NerscIO::truncate(file);
+    offset = writeHeader(data.size(), 0, format, file);
+  }
+  grid->Broadcast(0,(void *)&offset,sizeof(offset)); //use as a barrier
+
+  std::cout << "Data offset write " << offset << std::endl;
+  std::cout << "Data size write " << data.size() << std::endl;
+  uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj);
+  std::cout << "Field size = " << field_size << " B" << std::endl;
+
+  uint32_t checksum = 0;
+  for(int i=0;i<data.size();i++){
+    std::cout << "Data field write " << i << " offset " << offset << std::endl;
+    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
+    BinaryIO::writeLatticeObject<vobj,sobj>(const_cast<FieldType &>(data[i]),file,munge,offset,format,
+					    nersc_csum,scidac_csuma,scidac_csumb);
+    offset += field_size;
+    checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2);
+  }
+  std::cout << "Write checksum " << checksum << std::endl;
+
+  if ( grid->IsBoss() ) { 
+    writeHeader(data.size(), checksum, format, file);
+  }
+}
+
+
+template<typename FieldType>
+void readFieldArray(std::vector<FieldType> &data, const std::string &file){
+  typedef typename FieldType::vector_object vobj;
+  typedef typename FieldType::scalar_object sobj;
+  assert(data.size() > 0);
+  GridBase* grid = data[0].Grid(); //assume all fields have the same Grid
+  BinarySimpleUnmunger<sobj, sobj> munge; //straight copy
+  
+  uint32_t hdr_checksum, hdr_size;
+  std::string format;
+  uint64_t offset = readHeader(hdr_size, hdr_checksum, format, file);
+  
+  std::cout << "Data offset read " << offset << std::endl;  
+  std::cout << "Data size read " << hdr_size << std::endl;
+  assert(data.size() == hdr_size);
+
+  uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj);
+
+  uint32_t checksum = 0;
+
+  for(int i=0;i<data.size();i++){
+    std::cout << "Data field read " << i << " offset " << offset << std::endl;
+    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
+    BinaryIO::readLatticeObject<vobj,sobj>(data[i],file,munge,offset,format,
+					   nersc_csum,scidac_csuma,scidac_csumb);
+    offset += field_size;
+    checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2);
+  }
+
+  std::cout << "Header checksum " << hdr_checksum << std::endl;    
+  std::cout << "Read checksum " << checksum << std::endl;
+    
+
+  assert( hdr_checksum == checksum );
+}
+
+
+
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  Coordinate latt   = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  Coordinate mpi_layout  = GridDefaultMpi();
+
+  const int Ls=8;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt, simd_layout, mpi_layout);
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  typedef DomainWallFermionD::FermionField FermionField;
+
+  int nfield = 20;
+  std::vector<FermionField> data(nfield, FGrid);
+
+  for(int i=0;i<data.size();i++)
+    gaussian(RNG5, data[i]);
+  
+  std::string file = "test_field_array_io.0";
+  writeFieldArray(file, data);
+
+  std::vector<FermionField> data_r(nfield, FGrid);
+  readFieldArray(data_r, file);
+  
+  for(int i=0;i<nfield;i++){
+    FermionField diff = data_r[i] - data[i];
+    RealD norm_diff = norm2(diff);
+    std::cout << "Norm2 of difference between stored and loaded data index " << i << " : " << norm_diff << std::endl;
+  }
+  
+  std::cout << "Done" << std::endl;
+
+  Grid_finalize();
+}
diff --git a/tests/lanczos/Test_compressed_lanczos_gparity.cc b/tests/lanczos/Test_compressed_lanczos_gparity.cc
new file mode 100644
index 00000000..ca353d61
--- /dev/null
+++ b/tests/lanczos/Test_compressed_lanczos_gparity.cc
@@ -0,0 +1,485 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_compressed_lanczos_gparity.cc
+
+    Copyright (C) 2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Leans heavily on Christoph Lehner's code
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+/*
+ *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features 
+ *  in Grid that were intended to be used to support blocked Aggregates, from
+ */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
+
+using namespace std;
+using namespace Grid;
+
+//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
+void readConfiguration(LatticeGaugeFieldD &U,
+		       const std::string &config,
+		       bool is_cps_cfg = false){
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+  typedef GaugeStatistics<ConjugateGimplD> GaugeStats;
+     
+  FieldMetaData header;
+  NerscIO::readConfiguration<GaugeStats>(U, header, config);
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
+}
+
+//Lanczos parameters in CPS conventions
+struct CPSLanczosParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
+				  RealD, alpha,
+				  RealD, beta,
+				  int, ch_ord,
+				  int, N_use,
+				  int, N_get,
+				  int, N_true_get,
+				  RealD, stop_rsd,
+				  int, maxits);
+
+  //Translations
+  ChebyParams getChebyParams() const{
+    ChebyParams out;
+    out.alpha = beta*beta; //aka lo
+    out.beta = alpha*alpha; //aka hi
+    out.Npoly = ch_ord+1;
+    return out;
+  }
+  int Nstop() const{ return N_true_get; }
+  int Nm() const{ return N_use; }
+  int Nk() const{ return N_get; }
+};
+
+//Maybe this class should be in the main library?
+template<class Fobj,class CComplex,int nbasis>
+class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
+{ 
+public:
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
+  typedef Lattice<Fobj>          FineField;
+
+  LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
+			      LinearOperatorBase<FineField> &FineOp,
+			      int checkerboard) 
+    // Base constructor
+    : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard) 
+  {};
+
+  void checkpointFine(std::string evecs_file,std::string evals_file)
+  {
+    assert(this->subspace.size()==nbasis);
+    emptyUserRecord record;
+    Grid::ScidacWriter WR(this->_FineGrid->IsBoss());
+    WR.open(evecs_file);
+    for(int k=0;k<nbasis;k++) {
+      WR.writeScidacFieldRecord(this->subspace[k],record);
+    }
+    WR.close();
+    
+    XmlWriter WRx(evals_file);
+    write(WRx,"evals",this->evals_fine);
+  }
+
+  void checkpointFineRestore(std::string evecs_file,std::string evals_file)
+  {
+    this->evals_fine.resize(nbasis);
+    this->subspace.resize(nbasis,this->_FineGrid);
+    
+    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",this->evals_fine);
+
+    if(this->evals_fine.size() < nbasis) assert(0 && "Not enough fine evals to complete basis");
+    if(this->evals_fine.size() > nbasis){ //allow the use of precomputed evecs with a larger #evecs
+      std::cout << GridLogMessage << "Truncating " << this->evals_fine.size() << " evals to basis size " << nbasis << std::endl;
+      this->evals_fine.resize(nbasis);
+    }     
+    
+    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evecs from "<<evecs_file<<std::endl;
+    emptyUserRecord record;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<nbasis;k++) {
+      this->subspace[k].Checkerboard()=this->_checkerboard;
+      RD.readScidacFieldRecord(this->subspace[k],record);
+      
+    }
+    RD.close();
+  }
+
+  void checkpointCoarse(std::string evecs_file,std::string evals_file)
+  {
+    int n = this->evec_coarse.size();
+    emptyUserRecord record;
+    Grid::ScidacWriter WR(this->_CoarseGrid->IsBoss());
+    WR.open(evecs_file);
+    for(int k=0;k<n;k++) {
+      WR.writeScidacFieldRecord(this->evec_coarse[k],record);
+    }
+    WR.close();
+    
+    XmlWriter WRx(evals_file);
+    write(WRx,"evals",this->evals_coarse);
+  }
+
+  void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
+  {
+    std::cout << "resizing coarse vecs to " << nvec<< std::endl;
+    this->evals_coarse.resize(nvec);
+    this->evec_coarse.resize(nvec,this->_CoarseGrid);
+    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",this->evals_coarse);
+
+    assert(this->evals_coarse.size()==nvec);
+    emptyUserRecord record;
+    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evecs from "<<evecs_file<<std::endl;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<nvec;k++) {
+      RD.readScidacFieldRecord(this->evec_coarse[k],record);
+    }
+    RD.close();
+  }
+};
+
+struct Options{
+  std::vector<int> blockSize;
+  std::vector<int> GparityDirs;
+  int Ls;
+  RealD mass;
+  RealD M5;
+  RealD mobius_scale;
+  std::string config;
+  bool is_cps_cfg;
+
+  double coarse_relax_tol;
+  int smoother_ord;
+  
+  CPSLanczosParams fine;
+  CPSLanczosParams coarse;
+
+  bool write_fine = false;
+  std::string write_fine_file;
+
+  bool read_fine = false;
+  std::string read_fine_file;
+
+  bool write_coarse = false;
+  std::string write_coarse_file;
+
+  bool read_coarse = false;
+  std::string read_coarse_file;
+
+  
+  Options(){
+    blockSize = std::vector<int> ({2,2,2,2,2});
+    GparityDirs = std::vector<int> ({1,1,1}); //1 for each GP direction
+    
+    Ls = 12;
+    mass = 0.01;
+    M5 = 1.8;
+    is_cps_cfg = false;
+    mobius_scale = 2.0;
+    
+    fine.alpha = 2;
+    fine.beta = 0.1;
+    fine.ch_ord = 100;
+    fine.N_use = 70;
+    fine.N_get = 60;
+    fine.N_true_get = 60;
+    fine.stop_rsd = 1e-8;
+    fine.maxits = 10000;
+
+    coarse.alpha = 2;
+    coarse.beta = 0.1;
+    coarse.ch_ord = 100;
+    coarse.N_use = 200;
+    coarse.N_get = 190;
+    coarse.N_true_get = 190;
+    coarse.stop_rsd = 1e-8;
+    coarse.maxits = 10000;
+
+    coarse_relax_tol = 1e5;
+    smoother_ord = 20;
+
+    write_fine = false;
+    read_fine = false;
+    write_coarse = false;
+    read_coarse = false;
+  }
+};  
+
+template<int nbasis>
+void runTest(const Options &opt){
+	        //Fine grids
+  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  GridDefaultSimd(Nd,vComplex::Nsimd()),   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(opt.Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(opt.Ls,UGrid);
+
+  //Setup G-parity BCs
+  assert(Nd == 4);
+  std::vector<int> dirs4(4);
+  for(int i=0;i<3;i++) dirs4[i] = opt.GparityDirs[i];
+  dirs4[3] = 0; //periodic gauge BC in time
+  
+  std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
+  ConjugateGimplD::setDirections(dirs4); //gauge BC
+
+  GparityWilsonImplD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = opt.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+  std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
+  
+  //Read the gauge field
+  LatticeGaugeField Umu(UGrid);  
+  readConfiguration(Umu, opt.config, opt.is_cps_cfg);
+
+  //Setup the coarse grids  
+  auto fineLatt     = GridDefaultLatt();
+  Coordinate coarseLatt(4);
+  for (int d=0;d<4;d++){
+    coarseLatt[d] = fineLatt[d]/opt.blockSize[d];    assert(coarseLatt[d]*opt.blockSize[d]==fineLatt[d]);
+  }
+
+  std::cout << GridLogMessage<< " 5d coarse lattice is ";
+  for (int i=0;i<4;i++){
+    std::cout << coarseLatt[i]<<"x";
+  } 
+  int cLs = opt.Ls/opt.blockSize[4]; assert(cLs*opt.blockSize[4]==opt.Ls);
+  std::cout << cLs<<std::endl;
+  
+  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
+  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
+
+  //Dirac operator
+  double bmc =  1.;      
+  double b = (opt.mobius_scale + bmc)/2.;  // b = 1/2 [ (b+c) + (b-c) ]
+  double c = (opt.mobius_scale - bmc)/2.;  // c = 1/2 [ (b+c) - (b-c) ]
+  
+  GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, opt.mass, opt.M5, b,c,Params);
+  typedef GparityMobiusFermionD::FermionField FermionField;
+  
+  SchurDiagTwoOperator<GparityMobiusFermionD, FermionField> SchurOp(action);
+
+  typedef GparityWilsonImplD::SiteSpinor SiteSpinor;
+
+  const CPSLanczosParams &fine = opt.fine;
+  const CPSLanczosParams &coarse = opt.coarse;
+
+  std::cout << GridLogMessage << "Keep " << fine.N_true_get   << " fine   vectors" << std::endl;
+  std::cout << GridLogMessage << "Keep " << coarse.N_true_get << " coarse vectors" << std::endl;
+  assert(coarse.N_true_get >= fine.N_true_get);
+
+  assert(nbasis<=fine.N_true_get);
+  LocalCoherenceLanczosScidac<SiteSpinor,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,SchurOp,Odd);
+  std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
+ 
+  //Compute and/or read fine evecs
+  if(opt.read_fine){
+    _LocalCoherenceLanczos.checkpointFineRestore(opt.read_fine_file + "_evecs.scidac", opt.read_fine_file + "_evals.xml");
+  }else{
+    std::cout << GridLogMessage << "Performing fine grid IRL" << std::endl;
+    std::cout << GridLogMessage << "Using Chebyshev alpha=" << fine.alpha << " beta=" << fine.beta << " ord=" << fine.ch_ord << std::endl;
+    _LocalCoherenceLanczos.calcFine(fine.getChebyParams(),
+				    fine.Nstop(),fine.Nk(),fine.Nm(),
+				    fine.stop_rsd,fine.maxits,0,0);
+    if(opt.write_fine){
+      std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
+      _LocalCoherenceLanczos.checkpointFine(opt.write_fine_file + "_evecs.scidac", opt.write_fine_file + "_evals.xml");
+    }
+  }
+  
+  //Block orthonormalise (this should be part of calcFine?)
+  std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
+  _LocalCoherenceLanczos.Orthogonalise();
+  std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
+
+  ChebyParams smoother = fine.getChebyParams();
+  smoother.Npoly = opt.smoother_ord+1;
+
+  if(opt.read_coarse){
+    _LocalCoherenceLanczos.checkpointCoarseRestore(opt.read_coarse_file + "_evecs.scidac", opt.read_coarse_file + "_evals.xml",coarse.Nstop());
+
+  }else{
+    std::cout << GridLogMessage << "Performing coarse grid IRL" << std::endl;
+    std::cout << GridLogMessage << "Using Chebyshev alpha=" << coarse.alpha << " beta=" << coarse.beta << " ord=" << coarse.ch_ord << std::endl;	
+    _LocalCoherenceLanczos.calcCoarse(coarse.getChebyParams(), smoother, opt.coarse_relax_tol,
+				      coarse.Nstop(), coarse.Nk() ,coarse.Nm(),
+				      coarse.stop_rsd, coarse.maxits, 
+				      0,0);
+
+    if(opt.write_coarse){
+      std::cout << GridLogIRL<<"Checkpointing Coarse evecs"<<std::endl;
+      _LocalCoherenceLanczos.checkpointCoarse(opt.write_coarse_file + "_evecs.scidac", opt.write_coarse_file + "_evals.xml");
+    }
+
+  }
+
+  //Test the eigenvectors
+  //To remove high-frequency noise we apply a Chebyshev smoothing
+  Chebyshev<FermionField> cheb_smoother(smoother);
+    
+  FermionField evec(FrbGrid);
+  FermionField evec_sm(FrbGrid); //smoothed
+  FermionField tmp(FrbGrid);
+  RealD eval;
+  
+  for(int i=0;i<coarse.N_true_get;i++){    
+    _LocalCoherenceLanczos.getFineEvecEval(evec, eval, i);
+
+    //Check unsmoothed evec
+    SchurOp.HermOp(evec, tmp);
+    tmp = tmp - eval*evec;
+    RealD norm_unsmoothed = sqrt(norm2(tmp));
+    
+    //Check smoothed evec
+    cheb_smoother(SchurOp, evec, evec_sm);   
+    SchurOp.HermOp(evec_sm, tmp);
+    tmp = tmp - eval*evec_sm;
+    RealD norm_smoothed = sqrt(norm2(tmp));
+    
+    std::cout << GridLogMessage << "Eval " << eval << " unsmoothed resid " << norm_unsmoothed << " smoothed resid " << norm_smoothed << std::endl;
+  }
+}
+
+
+//Note:  because we rely upon physical properties we must use a "real" gauge configuration
+int main (int argc, char ** argv) {
+  Grid_init(&argc,&argv);
+  GridLogIRL.TimingMode(1);
+
+  Options opt;
+  int basis_size = 100;
+  
+  if(argc < 3){
+    std::cout << GridLogMessage << "Usage: <exe> <config> <gparity dirs> <options>" << std::endl;
+    std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
+    std::cout << GridLogMessage << "Options:" << std::endl;
+    std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
+    std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
+    std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents  (default 2.2.2.2.2)" << std::endl;
+    std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
+    std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_coarse <filename>: Real the parameters file for the coarse Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--write_coarse <filename stub>: Write coarse evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_coarse <filename stub>: Read coarse evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--smoother_ord :  Set the Chebyshev order of the smoother (default 20)" << std::endl;
+    std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
+    std::cout << GridLogMessage << "--basis_size : Select the basis size from 100,200,300,350 (default 100)" << std::endl;
+    Grid_finalize();
+    return 1;
+  }
+  opt.config = argv[1];
+  GridCmdOptionIntVector(argv[2], opt.GparityDirs);
+  assert(opt.GparityDirs.size() == 3);
+
+  for(int i=3;i<argc;i++){
+    std::string sarg = argv[i];
+    if(sarg == "--Ls"){
+      opt.Ls = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set Ls to " << opt.Ls << std::endl;
+    }else if(sarg == "--mass"){
+      std::istringstream ss(argv[i+1]); ss >> opt.mass;
+      std::cout << GridLogMessage << "Set quark mass to " << opt.mass << std::endl;
+    }else if(sarg == "--block"){
+      GridCmdOptionIntVector(argv[i+1], opt.blockSize);
+      assert(opt.blockSize.size() == 5);
+      std::cout << GridLogMessage << "Set block size to ";
+      for(int q=0;q<5;q++) std::cout << opt.blockSize[q] << " ";
+      std::cout << std::endl;      
+    }else if(sarg == "--is_cps_cfg"){
+      opt.is_cps_cfg = true;
+    }else if(sarg == "--write_irl_templ"){
+      XmlWriter writer("irl_templ.xml");
+      write(writer,"Params", opt.fine);
+      Grid_finalize();
+      return 0;
+    }else if(sarg == "--read_irl_fine"){
+      std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", opt.fine);
+    }else if(sarg == "--read_irl_coarse"){
+      std::cout << GridLogMessage << "Reading coarse IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", opt.coarse);
+    }else if(sarg == "--write_fine"){
+      opt.write_fine = true;
+      opt.write_fine_file = argv[i+1];
+    }else if(sarg == "--read_fine"){
+      opt.read_fine = true;
+      opt.read_fine_file = argv[i+1];
+    }else if(sarg == "--write_coarse"){
+      opt.write_coarse = true;
+      opt.write_coarse_file = argv[i+1];
+    }else if(sarg == "--read_coarse"){
+      opt.read_coarse = true;
+      opt.read_coarse_file = argv[i+1];
+    }else if(sarg == "--smoother_ord"){
+      std::istringstream ss(argv[i+1]); ss >> opt.smoother_ord;
+      std::cout << GridLogMessage << "Set smoother order to " << opt.smoother_ord << std::endl;
+    }else if(sarg == "--coarse_relax_tol"){
+      std::istringstream ss(argv[i+1]); ss >> opt.coarse_relax_tol;
+      std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << opt.coarse_relax_tol << std::endl;
+    }else if(sarg == "--mobius_scale"){
+      std::istringstream ss(argv[i+1]); ss >> opt.mobius_scale;
+      std::cout << GridLogMessage << "Set Mobius scale to " << opt.mobius_scale << std::endl;
+    }else if(sarg == "--basis_size"){
+      basis_size = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set basis size to " << basis_size << std::endl;
+    }
+  }
+
+  switch(basis_size){
+  case 100:
+    runTest<100>(opt); break;
+  case 200:
+    runTest<200>(opt); break;
+  case 300:
+    runTest<300>(opt); break;
+  case 350:
+    runTest<350>(opt); break;
+  default:
+    std::cout << GridLogMessage << "Unsupported basis size " << basis_size << std::endl;
+    assert(0);
+  }
+  
+  Grid_finalize();
+}
+
diff --git a/tests/lanczos/Test_evec_compression.cc b/tests/lanczos/Test_evec_compression.cc
new file mode 100644
index 00000000..1636ea3a
--- /dev/null
+++ b/tests/lanczos/Test_evec_compression.cc
@@ -0,0 +1,582 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_evec_compression.cc
+
+    Copyright (C) 2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+/*
+ *
+ * This test generates eigenvectors using the Lanczos algorithm then attempts to use local coherence compression
+ * to express those vectors in terms of a basis formed from a subset. This test is useful for finding the optimal
+ * blocking and basis size for performing a Local Coherence Lanczos
+ */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
+
+using namespace std;
+using namespace Grid;
+
+//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
+template<typename Gimpl>
+void readConfiguration(LatticeGaugeFieldD &U,
+		       const std::string &config,
+		       bool is_cps_cfg = false){
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+  typedef GaugeStatistics<Gimpl> GaugeStats;
+     
+  FieldMetaData header;
+  NerscIO::readConfiguration<GaugeStats>(U, header, config);
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
+}
+
+//Lanczos parameters in CPS conventions
+struct CPSLanczosParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
+				  RealD, alpha,
+				  RealD, beta,
+				  int, ch_ord,
+				  int, N_use,
+				  int, N_get,
+				  int, N_true_get,
+				  RealD, stop_rsd,
+				  int, maxits);
+
+  //Translations
+  ChebyParams getChebyParams() const{
+    ChebyParams out;
+    out.alpha = beta*beta; //aka lo
+    out.beta = alpha*alpha; //aka hi
+    out.Npoly = ch_ord+1;
+    return out;
+  }
+  int Nstop() const{ return N_true_get; }
+  int Nm() const{ return N_use; }
+  int Nk() const{ return N_get; }
+};
+
+
+template<class Fobj,class CComplex,int nbasis>
+class LocalCoherenceCompressor{
+public:
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<Fobj>                       FineField;
+  
+  void compress(std::vector<FineField> &basis,
+		std::vector<CoarseField> &compressed_evecs,
+		const std::vector<FineField> &evecs_in,
+		GridBase *FineGrid,
+		GridBase *CoarseGrid){
+    int nevecs = evecs_in.size();
+    assert(nevecs > nbasis);
+    
+    //Construct the basis
+    basis.resize(nbasis, FineGrid);
+    for(int b=0;b<nbasis;b++) basis[b] = evecs_in[b];
+
+    //Block othornormalize basis
+    CoarseScalar InnerProd(CoarseGrid);
+    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
+    blockOrthogonalise(InnerProd,basis);
+    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
+    blockOrthogonalise(InnerProd,basis);
+
+    //The coarse grid representation is the field of vectors of block inner products
+    std::cout << GridLogMessage << "Compressing eigevectors" << std::endl;
+    compressed_evecs.resize(nevecs, CoarseGrid);
+    for(int i=0;i<nevecs;i++) blockProject(compressed_evecs[i], evecs_in[i], basis);
+    std::cout << GridLogMessage << "Compression complete" << std::endl;
+  }
+
+  void uncompress(FineField &evec, const int i, const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs) const{
+    blockPromote(compressed_evecs[i],evec,basis);  
+  }
+
+  //Test uncompressed eigenvectors of Linop.HermOp to precision 'base_tolerance' for i<nbasis and 'base_tolerance*relax' for i>=nbasis
+  //Because the uncompressed evec has a lot of high mode noise (unimportant for deflation) we apply a smoother before testing.
+  //The Chebyshev used by the Lanczos should be sufficient as a smoother
+  bool testCompression(LinearOperatorBase<FineField> &Linop, OperatorFunction<FineField>   &smoother,
+		       const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<RealD> &evals,
+		       const RealD base_tolerance, const RealD relax){
+    std::cout << GridLogMessage << "Testing quality of uncompressed evecs (after smoothing)" << std::endl;
+   
+    GridBase* FineGrid = basis[0].Grid();
+    GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+    bool fail = false;
+    FineField evec(FineGrid), Mevec(FineGrid), evec_sm(FineGrid);
+    for(int i=0;i<compressed_evecs.size();i++){
+      std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
+      uncompress(evec, i, basis, compressed_evecs);
+
+      std::cout << GridLogMessage << "Smoothing evec " << i << std::endl;
+      smoother(Linop, evec, evec_sm);
+      
+      std::cout << GridLogMessage << "Computing residual for evec " << i << std::endl;
+      std::cout << GridLogMessage << "Linop" << std::endl;
+      Linop.HermOp(evec_sm, Mevec);
+      std::cout << GridLogMessage << "Linalg" << std::endl;
+      Mevec = Mevec - evals[i]*evec_sm;
+
+      std::cout << GridLogMessage << "Resid" << std::endl;
+      RealD tol = base_tolerance * (i<nbasis ? 1. : relax);
+      RealD res = sqrt(norm2(Mevec));
+      std::cout << GridLogMessage << "Evec idx " << i << " res " << res << " tol " << tol << std::endl;
+      if(res > tol) fail = true;
+    }
+    return fail;
+  }
+
+  //Compare uncompressed evecs to original evecs
+  void compareEvecs(const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<FineField> &orig_evecs){
+    std::cout << GridLogMessage << "Comparing uncompressed evecs to original evecs" << std::endl;
+    
+    GridBase* FineGrid = basis[0].Grid();
+    GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+    FineField evec(FineGrid), diff(FineGrid);
+    for(int i=0;i<compressed_evecs.size();i++){
+      std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
+      uncompress(evec, i, basis, compressed_evecs);
+      diff = orig_evecs[i] - evec;
+      RealD res = sqrt(norm2(diff));
+      std::cout << GridLogMessage << "Evec idx " << i << " res " << res << std::endl;
+    }
+  }
+  
+};
+
+template<class Fobj,class CComplex,int nbasis>
+void compareBlockPromoteTimings(const std::vector<Lattice<Fobj> > &basis, const std::vector<Lattice<iVector<CComplex,nbasis > > > &compressed_evecs){
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar; 
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<Fobj>                       FineField;
+
+  GridStopWatch timer;
+  
+  GridBase* FineGrid = basis[0].Grid();
+  GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+  FineField v1(FineGrid), v2(FineGrid);
+
+  //Start with a cold start
+  for(int i=0;i<basis.size();i++){
+    autoView( b_ , basis[i], CpuWrite);
+  }
+  for(int i=0;i<compressed_evecs.size();i++){
+    autoView( b_ , compressed_evecs[i], CpuWrite);
+  }
+  {
+    autoView( b_, v1, CpuWrite );
+  }
+
+  timer.Start();
+  blockPromote(compressed_evecs[0],v1,basis);  
+  timer.Stop();
+  std::cout << GridLogMessage << "Time for cold blockPromote v1 " << timer.Elapsed() << std::endl;
+
+  //Test to ensure it is actually doing a cold start by repeating
+  for(int i=0;i<basis.size();i++){
+    autoView( b_ , basis[i], CpuWrite);
+  }
+  for(int i=0;i<compressed_evecs.size();i++){
+    autoView( b_ , compressed_evecs[i], CpuWrite);
+  }
+  {
+    autoView( b_, v1, CpuWrite );
+  }
+
+  timer.Reset();
+  timer.Start();
+  blockPromote(compressed_evecs[0],v1,basis);  
+  timer.Stop();
+  std::cout << GridLogMessage << "Time for cold blockPromote v1 repeat (should be the same as above) " << timer.Elapsed() << std::endl;
+}
+
+struct Args{
+  int Ls;
+  RealD mass;
+  RealD M5;
+  bool is_cps_cfg;
+  RealD mobius_scale; //b+c
+  
+  CPSLanczosParams fine;
+  double coarse_relax_tol;
+
+  std::vector<int> blockSize;
+  std::vector<int> GparityDirs;
+
+  bool write_fine;
+  std::string write_fine_file;
+  bool read_fine;
+  std::string read_fine_file;
+
+  int basis_size;
+  
+  Args(){
+    blockSize = {2,2,2,2,2};
+    GparityDirs = {1,1,1}; //1 for each GP direction
+    
+    Ls = 12;
+    mass = 0.01;
+    M5 = 1.8;
+    is_cps_cfg = false;
+    mobius_scale = 2;
+    
+    fine.alpha = 2;
+    fine.beta = 0.1;
+    fine.ch_ord = 100;
+    fine.N_use = 70;
+    fine.N_get = 60;
+    fine.N_true_get = 60;
+    fine.stop_rsd = 1e-8;
+    fine.maxits = 10000;
+
+    coarse_relax_tol = 1e5;
+
+    write_fine = false;
+    read_fine = false;
+
+    basis_size = 100;
+  }
+};
+    
+
+GparityWilsonImplD::ImplParams setupGparityParams(const std::vector<int> &GparityDirs){
+  //Setup G-parity BCs
+  assert(Nd == 4);
+  std::vector<int> dirs4(4);
+  for(int i=0;i<3;i++) dirs4[i] = GparityDirs[i];
+  dirs4[3] = 0; //periodic gauge BC in time
+  
+  std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
+  ConjugateGimplD::setDirections(dirs4); //gauge BC
+
+  GparityWilsonImplD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+  std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
+  return Params;
+}
+
+WilsonImplD::ImplParams setupParams(){
+  WilsonImplD::ImplParams Params;
+  Complex one(1.0);
+  Complex mone(-1.0);
+  for(int i=0;i<Nd-1;i++) Params.boundary_phases[i] = one;
+  Params.boundary_phases[Nd-1] = mone;
+  return Params;
+}
+
+template<int nbasis, typename ActionType>
+void run_b(ActionType &action, const std::string &config, const Args &args){
+  //Fine grids
+  GridCartesian         * UGrid     = (GridCartesian*)action.GaugeGrid();
+  GridRedBlackCartesian * UrbGrid   = (GridRedBlackCartesian*)action.GaugeRedBlackGrid();
+  GridCartesian         * FGrid     = (GridCartesian*)action.FermionGrid();
+  GridRedBlackCartesian * FrbGrid   = (GridRedBlackCartesian*)action.FermionRedBlackGrid();
+
+  //Setup the coarse grids  
+  auto fineLatt     = GridDefaultLatt();
+  Coordinate coarseLatt(4);
+  for (int d=0;d<4;d++){
+    coarseLatt[d] = fineLatt[d]/args.blockSize[d];    assert(coarseLatt[d]*args.blockSize[d]==fineLatt[d]);
+  }
+
+  std::cout << GridLogMessage<< " 5d coarse lattice is ";
+  for (int i=0;i<4;i++){
+    std::cout << coarseLatt[i]<<"x";
+  } 
+  int cLs = args.Ls/args.blockSize[4]; assert(cLs*args.blockSize[4]==args.Ls);
+  std::cout << cLs<<std::endl;
+  
+  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
+  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
+  typedef vTComplex CComplex; 
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar;
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+
+  typedef typename ActionType::FermionField FermionField; 
+  
+  SchurDiagTwoOperator<ActionType,FermionField> SchurOp(action);
+
+  typedef typename ActionType::SiteSpinor SiteSpinor;
+
+  const CPSLanczosParams &fine = args.fine;
+  
+  //Do the fine Lanczos
+  std::vector<RealD> evals;
+  std::vector<FermionField> evecs;
+
+  if(args.read_fine){
+    evals.resize(fine.N_true_get);
+    evecs.resize(fine.N_true_get, FrbGrid);
+
+    std::string evals_file = args.read_fine_file + "_evals.xml";
+    std::string evecs_file = args.read_fine_file + "_evecs.scidac";
+    
+    std::cout << GridLogIRL<< "Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",evals);
+    
+    assert(evals.size()==fine.N_true_get);
+    
+    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
+    emptyUserRecord record;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<fine.N_true_get;k++) {
+      evecs[k].Checkerboard()=Odd;
+      RD.readScidacFieldRecord(evecs[k],record);
+      
+    }
+    RD.close();
+  }else{ 
+    int Nstop = fine.Nstop(); //==N_true_get
+    int Nm = fine.Nm();
+    int Nk = fine.Nk();
+    RealD resid = fine.stop_rsd;
+    int MaxIt = fine.maxits;
+    
+    assert(nbasis<=Nm);    
+    Chebyshev<FermionField>      Cheby(fine.getChebyParams());
+    FunctionHermOp<FermionField> ChebyOp(Cheby,SchurOp);
+    PlainHermOp<FermionField>    Op(SchurOp);
+
+    evals.resize(Nm);
+    evecs.resize(Nm,FrbGrid);
+    
+    ImplicitlyRestartedLanczos<FermionField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,0,0);
+
+    FermionField src(FrbGrid); 
+    typedef typename FermionField::scalar_type Scalar;
+    src=Scalar(1.0); 
+    src.Checkerboard() = Odd;
+
+    int Nconv;
+    IRL.calc(evals, evecs,src,Nconv,false);
+    if(Nconv < Nstop) assert(0 && "Fine lanczos failed to converge the required number of evecs"); //algorithm doesn't consider this a failure
+    if(Nconv > Nstop){
+      //Yes this potentially throws away some evecs but it is better than having a random number of evecs between Nstop and Nm!
+      evals.resize(Nstop);
+      evecs.resize(Nstop, FrbGrid);
+    }
+    
+    if(args.write_fine){
+      std::string evals_file = args.write_fine_file + "_evals.xml";
+      std::string evecs_file = args.write_fine_file + "_evecs.scidac";
+
+      std::cout << GridLogIRL<< "Writing evecs to "<<evecs_file<<std::endl;
+
+      emptyUserRecord record;
+      Grid::ScidacWriter WR(FrbGrid->IsBoss());
+      WR.open(evecs_file);
+      for(int k=0;k<evecs.size();k++) {
+	WR.writeScidacFieldRecord(evecs[k],record);
+      }
+      WR.close();
+
+      std::cout << GridLogIRL<< "Writing evals to "<<evals_file<<std::endl;
+      
+      XmlWriter WRx(evals_file);
+      write(WRx,"evals",evals);
+    }    
+  }
+    
+  //Do the compression
+  LocalCoherenceCompressor<SiteSpinor,vTComplex,nbasis> compressor;
+  std::vector<FermionField> basis(nbasis,FrbGrid);
+  std::vector<CoarseField> compressed_evecs(evecs.size(),CoarseGrid5);
+  
+  compressor.compress(basis, compressed_evecs, evecs, FrbGrid, CoarseGrid5);
+
+  compareBlockPromoteTimings(basis, compressed_evecs);
+
+  //Compare uncompressed and original evecs
+  compressor.compareEvecs(basis, compressed_evecs, evecs);
+  
+  //Create the smoother
+  Chebyshev<FermionField> smoother(fine.getChebyParams());
+  
+  //Test the quality of the uncompressed evecs
+  assert( compressor.testCompression(SchurOp, smoother, basis, compressed_evecs, evals, fine.stop_rsd, args.coarse_relax_tol) );   
+}
+
+template<typename ActionType>
+void run(ActionType &action, const std::string &config, const Args &args){
+  switch(args.basis_size){
+  case 50:
+    return run_b<50>(action,config,args);
+  case 100:
+    return run_b<100>(action,config,args);
+  case 150:
+    return run_b<150>(action,config,args);
+  case 200:
+    return run_b<200>(action,config,args);
+  case 250:
+    return run_b<250>(action,config,args);
+  case 300:
+    return run_b<300>(action,config,args);
+  case 350:
+    return run_b<350>(action,config,args);
+  case 400:
+    return run_b<400>(action,config,args);
+  default:
+    assert(0 && "Unsupported basis size: allowed values are 50,100,200,250,300,350,400");
+  }
+}
+
+
+
+
+//Note:  because we rely upon physical properties we must use a "real" gauge configuration
+int main (int argc, char ** argv) {
+  Grid_init(&argc,&argv);
+  GridLogIRL.TimingMode(1);
+
+  if(argc < 3){
+    std::cout << GridLogMessage << "Usage: <exe> <config file> <gparity dirs> <options>" << std::endl;
+    std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
+    std::cout << GridLogMessage << "Options:" << std::endl;
+    std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
+    std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
+    std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents  (default 2.2.2.2.2)" << std::endl;
+    std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
+    std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;    
+    std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
+    std::cout << GridLogMessage << "--action : Set the action from 'DWF', 'Mobius'  (default Mobius)" << std::endl;
+    std::cout << GridLogMessage << "--mobius_scale : Set the Mobius scale b+c (default 2)" << std::endl;
+    std::cout << GridLogMessage << "--basis_size : Set the basis size from 50,100,150,200,250,300,350,400 (default 100)" << std::endl;
+
+    Grid_finalize();
+    return 1;
+  }
+  std::string config = argv[1];
+
+  Args args;
+  GridCmdOptionIntVector(argv[2], args.GparityDirs);
+  assert(args.GparityDirs.size() == 3);
+
+  std::string action_s = "Mobius"; 
+  
+  for(int i=3;i<argc;i++){
+    std::string sarg = argv[i];
+    if(sarg == "--Ls"){
+      args.Ls = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set Ls to " << args.Ls << std::endl;
+    }else if(sarg == "--mass"){
+      std::istringstream ss(argv[i+1]); ss >> args.mass;
+      std::cout << GridLogMessage << "Set quark mass to " << args.mass << std::endl;
+    }else if(sarg == "--block"){
+      GridCmdOptionIntVector(argv[i+1], args.blockSize);
+      assert(args.blockSize.size() == 5);
+      std::cout << GridLogMessage << "Set block size to ";
+      for(int q=0;q<5;q++) std::cout << args.blockSize[q] << " ";
+      std::cout << std::endl;      
+    }else if(sarg == "--is_cps_cfg"){
+      args.is_cps_cfg = true;
+    }else if(sarg == "--write_irl_templ"){
+      XmlWriter writer("irl_templ.xml");
+      write(writer,"Params",args.fine);
+      Grid_finalize();
+      return 0;
+    }else if(sarg == "--read_irl_fine"){
+      std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", args.fine);
+    }else if(sarg == "--write_fine"){
+      args.write_fine = true;
+      args.write_fine_file = argv[i+1];
+    }else if(sarg == "--read_fine"){
+      args.read_fine = true;
+      args.read_fine_file = argv[i+1];
+    }else if(sarg == "--coarse_relax_tol"){
+      std::istringstream ss(argv[i+1]); ss >> args.coarse_relax_tol;
+      std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << args.coarse_relax_tol << std::endl;
+    }else if(sarg == "--action"){
+      action_s = argv[i+1];
+      std::cout << "Action set to " << action_s << std::endl;
+    }else if(sarg == "--mobius_scale"){
+      std::istringstream ss(argv[i+1]); ss >> args.mobius_scale;
+      std::cout << GridLogMessage << "Set Mobius scale to " << args.mobius_scale << std::endl;
+    }else if(sarg == "--basis_size"){
+      args.basis_size = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set basis size to " << args.basis_size << std::endl;
+    }
+  }
+  
+  //Fine grids
+  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  GridDefaultSimd(Nd,vComplex::Nsimd()),   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(args.Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(args.Ls,UGrid);
+
+  LatticeGaugeField Umu(UGrid);  
+  
+  bool is_gparity = false;
+  for(auto g : args.GparityDirs) if(g) is_gparity = true;
+
+  double bmc =  1.;      
+  double b = (args.mobius_scale + bmc)/2.;  // b = 1/2 [ (b+c) + (b-c) ]
+  double c = (args.mobius_scale - bmc)/2.;  // c = 1/2 [ (b+c) - (b-c) ]
+    
+  if(is_gparity){
+    GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs);
+    readConfiguration<ConjugateGimplD>(Umu, config, args.is_cps_cfg);   //Read the gauge field
+    
+    if(action_s == "DWF"){    
+      GparityDomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params);
+      run(action, config, args);
+    }else if(action_s == "Mobius"){
+      GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
+      run(action, config, args);	    
+    }      
+  }else{
+    WilsonImplD::ImplParams Params = setupParams();
+    readConfiguration<PeriodicGimplD>(Umu, config, args.is_cps_cfg);   //Read the gauge field
+    
+    if(action_s == "DWF"){    
+      DomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params);
+      run(action, config, args);
+    }else if(action_s == "Mobius"){
+      MobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
+      run(action, config, args);	    
+    }
+  } 
+  
+  Grid_finalize();
+}