From 33e4a0caee673c2bdebfdf5b8dce9412d52bbe5f Mon Sep 17 00:00:00 2001
From: Christopher Kelly <ckelly@phys.columbia.edu>
Date: Fri, 1 Jul 2022 14:10:59 -0400
Subject: [PATCH] Imported changes from feature/gparity_HMC branch: 	Rework
 of WilsonFlow class 		Fixed logic error in smear method where the
 step index was initialized to 1 rather than 0, resulting in the logged output
 value of tau being too large by epsilon 		Previously
 smear_adaptive would maintain the current value of tau as a class member
 variable whereas smear would compute it separately; now both methods maintain
 the current value internally and it is updated by the evolve_step routines.
 Both evolve methods are now const. 		smear_adaptive now also
 maintains the current value of epsilon internally, allowing it to be a const
 method and also allowing the same class instance to be reused without needing
 to be reset 		Replaced the fixed evaluation of the plaquette energy
 density and plaquette topological charge during the smearing with a highly
 flexible general strategy where the user can add arbitrary measurements as
 functional objects that are evaluated at an arbitrary frequency 	      
  By default the same plaquette-based measurements are performed, but
 additional example functions are provided where the smearing is performed
 with different choices of measurement that are returned as an array for
 further processing 		Added a method to compute the energy density
 using the Cloverleaf approach which has smaller discretization errors 	Added
 a new tensor utility operation, copyLane, which allows for the copying of a
 single SIMD lane between two instances of the same tensor type but
 potentially different precisions 	To LocalCoherenceLanczos, added the
 option to compute the high/low eval of the fine operator on every restart to
 aid in tuning the Chebyshev 	Added Test_field_array_io which demonstrates
 and tests a single-file write of an arbitrary array of fields 	Added
 Test_evec_compression which generates evecs using Lanczos and attempts to
 compress them using the local coherence technique 	Added
 Test_compressed_lanczos_gparity which demonstrates the local coherence
 Lanczos for G-parity BCs 	Added HMC main programs for the 40ID and 48ID
 G-parity lattices

---
 .../iterative/LocalCoherenceLanczos.h         |  19 +-
 Grid/qcd/observables/topological_charge.h     |   2 +-
 Grid/qcd/smearing/WilsonFlow.h                | 200 +++-
 Grid/tensors/Tensor_extract_merge.h           |  41 +
 HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc        | 918 ++++++++++++++++++
 HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc        | 873 +++++++++++++++++
 tests/IO/Test_field_array_io.cc               | 184 ++++
 .../Test_compressed_lanczos_gparity.cc        | 485 +++++++++
 tests/lanczos/Test_evec_compression.cc        | 582 +++++++++++
 9 files changed, 3247 insertions(+), 57 deletions(-)
 create mode 100644 HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
 create mode 100644 HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
 create mode 100644 tests/IO/Test_field_array_io.cc
 create mode 100644 tests/lanczos/Test_compressed_lanczos_gparity.cc
 create mode 100644 tests/lanczos/Test_evec_compression.cc
diff --git a/Grid/algorithms/iterative/LocalCoherenceLanczos.h b/Grid/algorithms/iterative/LocalCoherenceLanczos.h
index 0a2fe55c..344a785a 100644
--- a/Grid/algorithms/iterative/LocalCoherenceLanczos.h
+++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h
@@ -146,14 +146,21 @@ public:
   LinearOperatorBase<FineField> &_Linop;
   RealD                             _coarse_relax_tol;
   std::vector<FineField>        &_subspace;
+
+  int _largestEvalIdxForReport; //The convergence of the LCL is based on the evals of the coarse grid operator, not those of the underlying fine grid operator
+                                //As a result we do not know what the eval range of the fine operator is until the very end, making tuning the Cheby bounds very difficult
+                                //To work around this issue, every restart we separately reconstruct the fine operator eval for the lowest and highest evec and print these
+                                //out alongside the evals of the coarse operator. To do so we need to know the index of the largest eval (i.e. Nstop-1)
+                                //NOTE: If largestEvalIdxForReport=-1 (default) then this is not performed
   
   ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField>   &Poly,
 					   OperatorFunction<FineField>   &smoother,
 					   LinearOperatorBase<FineField> &Linop,
 					   std::vector<FineField>        &subspace,
-					   RealD coarse_relax_tol=5.0e3) 
+					   RealD coarse_relax_tol=5.0e3,
+					   int largestEvalIdxForReport=-1) 
     : _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
-      _coarse_relax_tol(coarse_relax_tol)  
+      _coarse_relax_tol(coarse_relax_tol), _largestEvalIdxForReport(largestEvalIdxForReport)
   {    };
 
   //evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
@@ -179,6 +186,12 @@ public:
 	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
 	     <<std::endl;
 
+    if(_largestEvalIdxForReport != -1 && (j==0 || j==_largestEvalIdxForReport)){
+      std::cout<<GridLogIRL << "Estimating true eval of fine grid operator for eval idx " << j << std::endl;
+      RealD tmp_eval;
+      ReconstructEval(j,eresid,B,tmp_eval,1.0); //don't use evalMaxApprox of coarse operator! (cf below)
+    }
+    
     int conv=0;
     if( (vv<eresid*eresid) ) conv = 1;
     return conv;
@@ -409,7 +422,7 @@ public:
     //////////////////////////////////////////////////////////////////////////////////////////////////
 
     Chebyshev<FineField>                                           ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
-    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); 
+    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax,Nstop-1); 
 
     evals_coarse.resize(Nm);
     evec_coarse.resize(Nm,_CoarseGrid);
diff --git a/Grid/qcd/observables/topological_charge.h b/Grid/qcd/observables/topological_charge.h
index 4f116496..7c09a180 100644
--- a/Grid/qcd/observables/topological_charge.h
+++ b/Grid/qcd/observables/topological_charge.h
@@ -99,7 +99,7 @@ public:
 	// using wilson flow by default here
 	WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
 	WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
-	Real T0   = WF.energyDensityPlaquette(Usmear);
+	Real T0   = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
 	std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
 		  << "T0                : [ " << traj << " ] "<< T0 << std::endl;
       }
diff --git a/Grid/qcd/smearing/WilsonFlow.h b/Grid/qcd/smearing/WilsonFlow.h
index 19fd94e2..0d1ee5d2 100644
--- a/Grid/qcd/smearing/WilsonFlow.h
+++ b/Grid/qcd/smearing/WilsonFlow.h
@@ -7,6 +7,7 @@ Source file: ./lib/qcd/modules/plaquette.h
 Copyright (C) 2017
 
 Author: Guido Cossu <guido.cossu@ed.ac.uk>
+Author: Christopher Kelly <ckelly@bnl.gov>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -33,28 +34,44 @@ NAMESPACE_BEGIN(Grid);
 
 template <class Gimpl>
 class WilsonFlow: public Smear<Gimpl>{
+public:
+  //Store generic measurements to take during smearing process using std::function
+  typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType;  //int: step,  RealD: flow time,  GaugeField : the gauge field
+  
+private:
   unsigned int Nstep;
-  unsigned int measure_interval;
-  mutable RealD epsilon, taus;
-
+  RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
+ 
+  std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
 
   mutable WilsonGaugeAction<Gimpl> SG;
 
-  void evolve_step(typename Gimpl::GaugeField&) const;
-  void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
-  RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
+  //Evolve the gauge field by 1 step and update tau
+  void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
+  //Evolve the gauge field by 1 step and update tau and the current time step eps
+  void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
 
 public:
   INHERIT_GIMPL_TYPES(Gimpl)
 
+  void resetActions(){ functions.clear(); }
+
+  void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
+
+  //Set the class to perform the default measurements: 
+  //the plaquette energy density every step
+  //the plaquette topological charge every 'topq_meas_interval' steps
+  //and output to stdout
+  void setDefaultMeasurements(int topq_meas_interval = 1);
+
   explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
   Nstep(Nstep),
     epsilon(epsilon),
-    measure_interval(interval),
     SG(WilsonGaugeAction<Gimpl>(3.0)) {
     // WilsonGaugeAction with beta 3.0
     assert(epsilon > 0.0);
     LogMessage();
+    setDefaultMeasurements(interval);
   }
 
   void LogMessage() {
@@ -73,9 +90,29 @@ public:
     // undefined for WilsonFlow
   }
 
-  void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
-  RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
-  RealD energyDensityPlaquette(const GaugeField& U) const;
+  void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
+
+  //Compute t^2 <E(t)> for time t from the plaquette
+  static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
+
+  //Compute t^2 <E(t)> for time t from the 1x1 cloverleaf form
+  //t is the Wilson flow time
+  static RealD energyDensityCloverleaf(const RealD t, const GaugeField& U);
+  
+  //Evolve the gauge field by Nstep steps of epsilon and return the energy density computed every interval steps
+  //The smeared field is output as V
+  std::vector<RealD> flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval = 1);
+
+  //Version that does not return the smeared field
+  std::vector<RealD> flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval = 1);
+
+
+  //Evolve the gauge field by Nstep steps of epsilon and return the Cloverleaf energy density computed every interval steps
+  //The smeared field is output as V
+  std::vector<RealD> flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval = 1);
+
+  //Version that does not return the smeared field
+  std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
 };
 
 
@@ -83,7 +120,7 @@ public:
 // Implementations
 ////////////////////////////////////////////////////////////////////////////////
 template <class Gimpl>
-void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
+void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
   GaugeField Z(U.Grid());
   GaugeField tmp(U.Grid());
   SG.deriv(U, Z);
@@ -99,12 +136,13 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
   SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2
   Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
   Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2
+  tau += epsilon;
 }
 
 template <class Gimpl>
-void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
-  if (maxTau - taus < epsilon){
-    epsilon = maxTau-taus;
+void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
+  if (maxTau - tau < eps){
+    eps = maxTau-tau;
   }
   //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
   GaugeField Z(U.Grid());
@@ -114,95 +152,151 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
   SG.deriv(U, Z);
   Zprime = -Z;
   Z *= 0.25;                                  // Z0 = 1/4 * F(U)
-  Gimpl::update_field(Z, U, -2.0*epsilon);    // U = W1 = exp(ep*Z0)*W0
+  Gimpl::update_field(Z, U, -2.0*eps);    // U = W1 = exp(ep*Z0)*W0
 
   Z *= -17.0/8.0;
   SG.deriv(U, tmp); Z += tmp;                 // -17/32*Z0 +Z1
   Zprime += 2.0*tmp;
   Z *= 8.0/9.0;                               // Z = -17/36*Z0 +8/9*Z1
-  Gimpl::update_field(Z, U, -2.0*epsilon);    // U_= W2 = exp(ep*Z)*W1
+  Gimpl::update_field(Z, U, -2.0*eps);    // U_= W2 = exp(ep*Z)*W1
     
 
   Z *= -4.0/3.0;
   SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2
   Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
-  Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2
+  Gimpl::update_field(Z, U, -2.0*eps);    // V(t+e) = exp(ep*Z)*W2
 
   // Ramos 
-  Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
+  Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
   // Compute distance as norm^2 of the difference
   GaugeField diffU = U - Uprime;
   RealD diff = norm2(diffU);
   // adjust integration step
     
-  taus += epsilon;
+  tau += eps;
   //std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
     
-  epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
+  eps = eps*0.95*std::pow(1e-4/diff,1./3.);
   //std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
 
 }
 
+
 template <class Gimpl>
-RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
-  RealD td = tau(step);
-  return 2.0 * td * td * SG.S(U)/U.Grid()->gSites();
+RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
+  static WilsonGaugeAction<Gimpl> SG(3.0);
+  return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
+}
+
+//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
+template <class Gimpl>
+RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
+  typedef typename Gimpl::GaugeLinkField GaugeMat;
+  typedef typename Gimpl::GaugeField GaugeLorentz;
+
+  assert(Nd == 4);
+  //E = 1/2 tr( F_munu F_munu )
+  //However as  F_numu = -F_munu, only need to sum the trace of the squares of the following 6 field strengths:
+  //F_01 F_02 F_03   F_12 F_13  F_23
+  GaugeMat F(U.Grid());
+  LatticeComplexD R(U.Grid());
+  R = Zero();
+  
+  for(int mu=0;mu<3;mu++){
+    for(int nu=mu+1;nu<4;nu++){
+      WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
+      R = R + trace(F*F);
+    }
+  }
+  ComplexD out = sum(R);
+  out = t*t*out / RealD(U.Grid()->gSites());
+  return -real(out); //minus sign necessary for +ve energy
+}
+
+
+template <class Gimpl>
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
+  std::vector<RealD> out;
+  resetActions();
+  addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){ 
+      std::cout << GridLogMessage << "[WilsonFlow] Computing plaquette energy density for step " << step << std::endl;
+      out.push_back( energyDensityPlaquette(t,U) );
+    });      
+  smear(V,U);
+  return out;
 }
 
 template <class Gimpl>
-RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
-  return 2.0 * taus * taus * SG.S(U)/U.Grid()->gSites();
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
+  GaugeField V(U);
+  return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
 }
 
+template <class Gimpl>
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
+  std::vector<RealD> out;
+  resetActions();
+  addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){ 
+      std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
+      out.push_back( energyDensityCloverleaf(t,U) );
+    });      
+  smear(V,U);
+  return out;
+}
+
+template <class Gimpl>
+std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
+  GaugeField V(U);
+  return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
+}
+
+
 
 //#define WF_TIMING 
-
-
-
 template <class Gimpl>
-void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
+void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
   out = in;
-  for (unsigned int step = 1; step <= Nstep; step++) {
+  RealD taus = 0.;
+  for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
     auto start = std::chrono::high_resolution_clock::now();
-    evolve_step(out);
+    evolve_step(out, taus);
     auto end = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> diff = end - start;
 #ifdef WF_TIMING
     std::cout << "Time to evolve " << diff.count() << " s\n";
 #endif
-    std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
-		  << step << "  " << tau(step) << "  " 
-	      << energyDensityPlaquette(step,out) << std::endl;
-    if( step % measure_interval == 0){
-      std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "
-		<< step << "  " 
-		<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
-    }
+    //Perform measurements
+    for(auto const &meas : functions)
+      if( step % meas.first == 0 ) meas.second(step,taus,out);
   }
 }
 
 template <class Gimpl>
-void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
+void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
   out = in;
-  taus = epsilon;
+  RealD taus = 0.;
+  RealD eps = epsilon;
   unsigned int step = 0;
   do{
     step++;
     //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
-    evolve_step_adaptive(out, maxTau);
-    std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
-		  << step << "  " << taus << "  "
-	      << energyDensityPlaquette(out) << std::endl;
-    if( step % measure_interval == 0){
-      std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "
-		<< step << "  " 
-		<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
-    }
+    evolve_step_adaptive(out, taus, eps, maxTau);
+    //Perform measurements
+    for(auto const &meas : functions)
+      if( step % meas.first == 0 ) meas.second(step,taus,out);
   } while (taus < maxTau);
-
-
-
 }
 
+template <class Gimpl>
+void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
+  addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
+      std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "  << step << "  " << t << "  " << energyDensityPlaquette(t,U) << std::endl;
+    });
+  addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
+      std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : "  << step << "  " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
+    });
+}
+
+
 NAMESPACE_END(Grid);
 
diff --git a/Grid/tensors/Tensor_extract_merge.h b/Grid/tensors/Tensor_extract_merge.h
index ea619d0f..ab14f81f 100644
--- a/Grid/tensors/Tensor_extract_merge.h
+++ b/Grid/tensors/Tensor_extract_merge.h
@@ -208,5 +208,46 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////////
+//Copy a single lane of a SIMD tensor type from one object to another
+//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
+///////////////////////////////////////////////////////////////////////////////////
+template<class vobjOut, class vobjIn>
+accelerator_inline 
+void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
+{
+  static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
+
+  typedef typename vobjOut::vector_type ovector_type;  
+  typedef typename vobjIn::vector_type ivector_type;  
+  constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
+  constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
+  static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
+
+  typedef typename vobjOut::scalar_type oscalar_type;  
+  typedef typename vobjIn::scalar_type iscalar_type;  
+  typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
+  typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
+
+  typedef oextract_type * opointer;
+  typedef iextract_type * ipointer;
+
+  constexpr int oNsimd=ovector_type::Nsimd();
+  constexpr int iNsimd=ivector_type::Nsimd();
+
+  iscalar_type itmp;
+  oscalar_type otmp;
+
+  opointer __restrict__  op = (opointer)&vecOut;
+  ipointer __restrict__  ip = (ipointer)&vecIn;
+  for(int w=0;w<owords;w++){
+    memcpy( (char*)&itmp, (char*)(ip + lane_in + iNsimd*w), sizeof(iscalar_type) );
+    otmp = itmp; //potential precision change
+    memcpy( (char*)(op + lane_out + oNsimd*w), (char*)&otmp, sizeof(oscalar_type) );
+  }
+}
+
+
 NAMESPACE_END(Grid);
 
diff --git a/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
new file mode 100644
index 00000000..6de636bd
--- /dev/null
+++ b/HMC/Mobius2p1fIDSDRGparityEOFA_40ID.cc
@@ -0,0 +1,918 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
+
+Copyright (C) 2015-2016
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+//Production binary for the 40ID G-parity ensemble
+
+struct RatQuoParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
+				  double, bnd_lo,
+				  double, bnd_hi,
+				  Integer, action_degree,
+				  double, action_tolerance,
+				  Integer, md_degree,
+				  double, md_tolerance,
+				  Integer, reliable_update_freq,
+				  Integer, bnd_check_freq);
+  RatQuoParameters() { 
+    bnd_lo = 1e-2;
+    bnd_hi = 30;
+    action_degree = 10;
+    action_tolerance = 1e-10;
+    md_degree = 10;
+    md_tolerance = 1e-8;
+    bnd_check_freq = 20;
+    reliable_update_freq = 50;
+  }
+
+  void Export(RationalActionParams &into) const{
+    into.lo = bnd_lo;
+    into.hi = bnd_hi;
+    into.action_degree = action_degree;
+    into.action_tolerance = action_tolerance;
+    into.md_degree = md_degree;
+    into.md_tolerance = md_tolerance;
+    into.BoundsCheckFreq = bnd_check_freq;
+  }
+};
+
+struct EOFAparameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
+				  OneFlavourRationalParams, rat_params,
+				  double, action_tolerance,
+				  double, action_mixcg_inner_tolerance,
+				  double, md_tolerance,
+				  double, md_mixcg_inner_tolerance);
+
+  EOFAparameters() { 
+    action_mixcg_inner_tolerance = 1e-8;
+    action_tolerance = 1e-10;
+    md_tolerance = 1e-8;
+    md_mixcg_inner_tolerance = 1e-8;
+
+    rat_params.lo = 1.0;
+    rat_params.hi = 25.0;
+    rat_params.MaxIter  = 50000;
+    rat_params.tolerance= 1.0e-9;
+    rat_params.degree   = 14;
+    rat_params.precision= 50;
+  }
+};
+
+struct EvolParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
+                                  Integer, StartTrajectory,
+                                  Integer, Trajectories,
+				  Integer, SaveInterval,
+				  Integer, Steps,
+				  RealD, TrajectoryLength,
+                                  bool, MetropolisTest,
+				  std::string, StartingType,
+				  std::vector<Integer>, GparityDirs,
+				  std::vector<EOFAparameters>, eofa_l,
+				  RatQuoParameters, rat_quo_s,
+				  RatQuoParameters, rat_quo_DSDR);
+
+  EvolParameters() {
+    //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
+    MetropolisTest    = false;
+    StartTrajectory   = 0;
+    Trajectories      = 50;
+    SaveInterval = 5;
+    StartingType      = "ColdStart";
+    GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
+    Steps = 5;
+    TrajectoryLength = 1.0;
+  }
+};
+
+bool fileExists(const std::string &fn){
+  std::ifstream f(fn);
+  return f.good();
+}
+
+
+
+
+struct LanczosParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
+				  double, alpha,
+				  double, beta,
+				  double, mu,
+				  int, ord,
+				  int, n_stop,
+				  int, n_want,
+				  int, n_use,
+				  double, tolerance);
+
+  LanczosParameters() {
+    alpha = 35;
+    beta = 5;
+    mu = 0;
+    ord = 100;
+    n_stop = 10;
+    n_want = 10;
+    n_use = 15;
+    tolerance = 1e-6;
+  }
+};
+
+
+
+template<typename FermionActionD, typename FermionFieldD>
+void computeEigenvalues(std::string param_file,
+			GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+			FermionActionD &action, GridParallelRNG &rng){
+  
+  LanczosParameters params;
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "LanczosParameters", params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "LanczosParameters", params);
+  }
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  action.ImportGauge(latt);
+
+  SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
+  PlainHermOp<FermionFieldD> hermop_wrap(hermop);
+  //ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
+  assert(params.mu == 0.0);
+
+  Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
+  FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
+
+  std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
+  ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 50000);
+
+  std::vector<RealD> eval(params.n_use);
+  std::vector<FermionFieldD> evec(params.n_use, rbGrid);
+  int Nconv;
+  IRL.calc(eval, evec, gauss_o, Nconv);
+
+  std::cout << "Eigenvalues:" << std::endl;
+  for(int i=0;i<params.n_want;i++){
+    std::cout << i << " " << eval[i] << std::endl;
+  }
+}
+
+
+//Check the quality of the RHMC approx
+//action_or_md toggles checking the action (0), MD (1) or both (2) setups
+template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
+void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+	       FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
+	       int inv_pow, const std::string &quark_descr, int action_or_md){
+  assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
+  
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  numOp.ImportGauge(latt);
+  denOp.ImportGauge(latt);
+
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
+  SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
+
+  PowerMethod<FermionFieldD> power_method;
+  RealD lambda_max;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
+
+  lambda_max = power_method(MdagM,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
+  lambda_max = power_method(VdagV,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  if(action_or_md == 0 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+
+  std::cout << "-------------------------------------------------------------------------------" << std::endl;
+
+  if(action_or_md == 1 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD); 
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+}
+
+
+template<typename FermionImplPolicy>
+void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+	       GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  RealD scale = std::sqrt(0.5);
+  gaussian(rng,eta); eta = eta * scale;
+
+  //Use the inbuilt check
+  EOFA.refresh(latt, eta);
+  EOFA.S(latt);
+  std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
+}
+
+
+template<typename FermionImplPolicy>
+class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
+  EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
+}
+
+//Applications of M^{-1} cost the same as M for EOFA!
+template<typename FermionImplPolicy>
+class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
+  EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
+}
+
+
+NAMESPACE_BEGIN(Grid);
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD   Tolerance;
+    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
+    Integer MaxInnerIterations;
+    Integer MaxOuterIterations;
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+
+    Integer TotalInnerIterations; //Number of inner CG iterations
+    Integer TotalOuterIterations; //Number of restarts
+    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+
+    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
+						    Integer maxinnerit, 
+						    Integer maxouterit, 
+						    GridBase* _sp_grid4, 
+						    GridBase* _sp_grid5, 
+						    FermionOperatorF &_FermOpF,
+						    FermionOperatorD &_FermOpD,
+						    SchurOperatorF   &_LinOpF,
+						    SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      InnerTolerance(tol), 
+      MaxInnerIterations(maxinnerit), 
+      MaxOuterIterations(maxouterit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5),
+      OuterLoopNormMult(100.) 
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
+      MPCG.InnerTolerance = InnerTolerance;
+      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD Tolerance;
+    Integer MaxIterations;
+
+    RealD Delta; //reliable update parameter
+
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+    
+    MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol, 
+								  RealD delta,
+								  Integer maxit, 
+								  GridBase* _sp_grid4, 
+								  GridBase* _sp_grid5, 
+								  FermionOperatorF &_FermOpF,
+								  FermionOperatorD &_FermOpD,
+								  SchurOperatorF   &_LinOpF,
+								  SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      Delta(delta),
+      MaxIterations(maxit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5)
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+
+      ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
+      std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+
+NAMESPACE_END(Grid);
+
+
+
+
+
+int main(int argc, char **argv) {
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  std::string param_file = "params.xml";
+  bool file_load_check = false;
+
+  std::string serial_seeds = "1 2 3 4 5";
+  std::string parallel_seeds = "6 7 8 9 10";
+
+  int i=1;
+  while(i < argc){
+    std::string sarg(argv[i]);
+    if(sarg == "--param_file"){
+      assert(i!=argc-1);
+      param_file = argv[i+1];
+      i+=2;
+    }else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
+      file_load_check = true;
+      i++;
+    }else if(sarg == "--set_seeds"){ //set the rng seeds. Expects two vector args, e.g.  --set_seeds 1.2.3.4 5.6.7.8
+      assert(i < argc-2);
+      std::vector<int> tmp;
+      GridCmdOptionIntVector(argv[i+1],tmp);
+      {
+	std::stringstream ss;
+	for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
+	ss << tmp.back();
+	serial_seeds = ss.str();
+      }
+      GridCmdOptionIntVector(argv[i+2],tmp);
+      {
+	std::stringstream ss;
+	for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
+	ss << tmp.back();
+	parallel_seeds = ss.str();
+      }
+      i+=3;
+      std::cout << GridLogMessage << "Set serial seeds to " << serial_seeds << std::endl;
+      std::cout << GridLogMessage << "Set parallel seeds to " << parallel_seeds << std::endl;
+      
+    }else{
+      i++;
+    }
+  }
+
+  
+  //Read the user parameters
+  EvolParameters user_params;
+  
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "Params", user_params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    {
+      Grid::XmlWriter wr(param_file + ".templ");
+      write(wr, "Params", user_params);
+    }
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+  //Check the parameters
+  if(user_params.GparityDirs.size() != Nd-1){
+    std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
+    exit(1);
+  }
+  for(int i=0;i<Nd-1;i++)
+    if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
+      std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
+      exit(1);
+    }
+
+
+  typedef GparityMobiusEOFAFermionD EOFAactionD;
+  typedef GparityMobiusFermionD FermionActionD;
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef GparityMobiusEOFAFermionF EOFAactionF;
+  typedef GparityMobiusFermionF FermionActionF;
+  typedef typename FermionActionF::Impl_t FermionImplPolicyF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  IntegratorParameters MD;
+  typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
+  MD.name    = std::string("MinimumNorm2");
+
+  // typedef ConjugateHMCRunnerD<ForceGradient> HMCWrapper;
+  // MD.name    = std::string("ForceGradient");
+  
+  MD.MDsteps = user_params.Steps;
+  MD.trajL   = user_params.TrajectoryLength;
+
+  typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
+  
+  HMCparameters HMCparams;
+  HMCparams.StartTrajectory  = user_params.StartTrajectory;
+  HMCparams.Trajectories     = user_params.Trajectories;
+  HMCparams.NoMetropolisUntil= 0;
+  HMCparams.StartingType     = user_params.StartingType;
+  HMCparams.MetropolisTest = user_params.MetropolisTest;
+  HMCparams.MD = MD;
+  HMCWrapper TheHMC(HMCparams);
+
+  // Grid from the command line arguments --grid and --mpi
+  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
+
+  CheckpointerParameters CPparams;
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix    = "ckpoint_rng";
+  CPparams.saveInterval  = user_params.SaveInterval;
+  CPparams.format        = "IEEE64BIG";
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = serial_seeds;
+  RNGpar.parallel_seeds = parallel_seeds;
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+  //aiming for ainv=1.723 GeV
+  //                                  me         bob
+  //Estimated  a(ml+mres) [40ID] = 0.001305    0.00131
+  //           a(mh+mres) [40ID] = 0.035910    0.03529
+  //Estimate Ls=12, b+c=2  mres~0.0011
+
+  //1/24/2022 initial mres measurement gives mres=0.001,  adjusted light quark mass to 0.0003 from 0.0001
+  
+  const int Ls      = 12;
+  Real beta         = 1.848;
+  Real light_mass   = 0.0003;
+  Real strange_mass = 0.0342;
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+  RealD mobius_scale = 2.; //b+c
+
+  RealD mob_bmc = 1.0;
+  RealD mob_b = (mobius_scale + mob_bmc)/2.;
+  RealD mob_c = (mobius_scale - mob_bmc)/2.;
+
+  std::cout << GridLogMessage
+	    << "Ensemble parameters:" << std::endl
+	    << "Ls=" << Ls << std::endl
+	    << "beta=" << beta << std::endl
+	    << "light_mass=" << light_mass << std::endl
+	    << "strange_mass=" << strange_mass << std::endl
+	    << "mobius_scale=" << mobius_scale << std::endl;
+  
+  //Setup the Grids
+  auto UGridD   = TheHMC.Resources.GetCartesian();
+  auto UrbGridD = TheHMC.Resources.GetRBCartesian();
+  auto FGridD     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
+  auto FrbGridD   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
+
+  GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
+
+  ConjugateIwasakiGaugeActionD GaugeAction(beta);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(UGridD);
+  LatticeGaugeFieldF Uf(UGridF);
+ 
+  //Setup the BCs
+  FermionActionD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+
+  std::vector<int> dirs4(Nd);
+  for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
+  dirs4[Nd-1] = 0; //periodic gauge BC in time
+
+  GaugeImplPolicy::setDirections(dirs4); //gauge BC
+
+  //Run optional gauge field checksum checker and exit
+  if(file_load_check){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  ////////////////////////////////////
+  // Collect actions
+  ////////////////////////////////////
+  ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
+  ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
+  ActionLevel<HMCWrapper::Field> Level3(2); //gauge
+
+
+  /////////////////////////////////////////////////////////////
+  // Light EOFA action
+  // have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
+  /////////////////////////////////////////////////////////////
+  typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
+  typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
+  typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
+  typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
+  typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
+
+
+  std::vector<RealD> eofa_light_masses = { light_mass ,  0.004,   0.016,   0.064,   0.256    };
+  std::vector<RealD> eofa_pv_masses =    { 0.004       , 0.016,   0.064,   0.256,   1.0      };
+  int n_light_hsb = 5;
+  assert(user_params.eofa_l.size() == n_light_hsb);
+  
+  EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
+
+  for(int i=0;i<n_light_hsb;i++){
+    RealD iml = eofa_light_masses[i];
+    RealD ipv = eofa_pv_masses[i];
+
+    EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+    EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+
+    EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
+    EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
+    
+    EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
+    EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
+
+#if 1
+    //Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance  as Delta for now
+    EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+    EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+#else
+    EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
+    std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
+#endif
+
+    EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
+							*LopD, *RopD, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*DerivMCG_L, *DerivMCG_R, 
+							user_params.eofa_l[i].rat_params, true);
+    EOFA_pfactions[i] = EOFA;
+    Level1.push_back(EOFA);
+  }
+
+  ////////////////////////////////////
+  // Strange action
+  ////////////////////////////////////
+  FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
+
+  FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
+
+  RationalActionParams rat_act_params_s;
+  rat_act_params_s.inv_pow  = 4; // (M^dag M)^{1/4}
+  rat_act_params_s.precision= 60;
+  rat_act_params_s.MaxIter  = 50000;
+  user_params.rat_quo_s.Export(rat_act_params_s);
+  std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); 
+  DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); 
+  Level1.push_back(&Quotient_s);  
+
+  ///////////////////////////////////
+  // DSDR action
+  ///////////////////////////////////
+  RealD dsdr_mass=-1.8;   
+  //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
+  RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
+  RealD dsdr_epsilon_b = 0.5; 
+  GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
+  GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
+
+  GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
+  GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
+ 
+  RationalActionParams rat_act_params_DSDR;
+  rat_act_params_DSDR.inv_pow  = 2; // (M^dag M)^{1/2}
+  rat_act_params_DSDR.precision= 60;
+  rat_act_params_DSDR.MaxIter  = 50000;
+  user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
+  std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
+  Level2.push_back(&Quotient_DSDR);
+
+  /////////////////////////////////////////////////////////////
+  // Gauge action
+  /////////////////////////////////////////////////////////////
+  Level3.push_back(&GaugeAction);
+
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  TheHMC.TheAction.push_back(Level3);
+  std::cout << GridLogMessage << " Action complete "<< std::endl;
+
+
+  //Action tuning
+  bool 
+    tune_rhmc_s=false, eigenrange_s=false, 
+    tune_rhmc_DSDR=false, eigenrange_DSDR=false, 
+    check_eofa=false, 
+    upper_bound_eofa=false, lower_bound_eofa(false);
+
+  std::string lanc_params_s;
+  std::string lanc_params_DSDR;
+  int tune_rhmc_s_action_or_md;
+  int tune_rhmc_DSDR_action_or_md;
+  int eofa_which_hsb;
+
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--tune_rhmc_s"){
+      assert(i < argc-1);
+      tune_rhmc_s=true;
+      tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_s"){
+      assert(i < argc-1);
+      eigenrange_s=true;
+      lanc_params_s = argv[i+1];
+    }
+    else if(sarg == "--tune_rhmc_DSDR"){
+      assert(i < argc-1);
+      tune_rhmc_DSDR=true;
+      tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_DSDR"){
+      assert(i < argc-1);
+      eigenrange_DSDR=true;
+      lanc_params_DSDR = argv[i+1];
+    }
+    else if(sarg == "--check_eofa"){
+      assert(i < argc-1);
+      check_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
+      assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
+    }
+    else if(sarg == "--upper_bound_eofa"){
+      assert(i < argc-1);
+      upper_bound_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+    else if(sarg == "--lower_bound_eofa"){
+      assert(i < argc-1);
+      lower_bound_eofa = true;      
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+  }
+  if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
+    std::cout << GridLogMessage << "Running checks" << std::endl;
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+
+    //std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
+    //std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
+
+    if(check_eofa){
+      if(eofa_which_hsb >= 0){
+	std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+	checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+      }else{
+	for(int i=0;i<n_light_hsb;i++){
+	  std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
+	  checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	  std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
+	}
+      }
+    }	  
+    if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange",  tune_rhmc_s_action_or_md);
+    if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
+
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  //Run the HMC
+  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
+  TheHMC.Run();
+
+  std::cout << GridLogMessage << " Done" << std::endl;
+  Grid_finalize();
+  return 0;
+} // main
diff --git a/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
new file mode 100644
index 00000000..42f54edd
--- /dev/null
+++ b/HMC/Mobius2p1fIDSDRGparityEOFA_48ID.cc
@@ -0,0 +1,873 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
+
+Copyright (C) 2015-2016
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+//Production binary for the 40ID G-parity ensemble
+
+struct RatQuoParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
+				  double, bnd_lo,
+				  double, bnd_hi,
+				  Integer, action_degree,
+				  double, action_tolerance,
+				  Integer, md_degree,
+				  double, md_tolerance,
+				  Integer, reliable_update_freq,
+				  Integer, bnd_check_freq);
+  RatQuoParameters() { 
+    bnd_lo = 1e-2;
+    bnd_hi = 30;
+    action_degree = 10;
+    action_tolerance = 1e-10;
+    md_degree = 10;
+    md_tolerance = 1e-8;
+    bnd_check_freq = 20;
+    reliable_update_freq = 50;
+  }
+
+  void Export(RationalActionParams &into) const{
+    into.lo = bnd_lo;
+    into.hi = bnd_hi;
+    into.action_degree = action_degree;
+    into.action_tolerance = action_tolerance;
+    into.md_degree = md_degree;
+    into.md_tolerance = md_tolerance;
+    into.BoundsCheckFreq = bnd_check_freq;
+  }
+};
+
+struct EOFAparameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
+				  OneFlavourRationalParams, rat_params,
+				  double, action_tolerance,
+				  double, action_mixcg_inner_tolerance,
+				  double, md_tolerance,
+				  double, md_mixcg_inner_tolerance);
+
+  EOFAparameters() { 
+    action_mixcg_inner_tolerance = 1e-8;
+    action_tolerance = 1e-10;
+    md_tolerance = 1e-8;
+    md_mixcg_inner_tolerance = 1e-8;
+
+    rat_params.lo = 1.0;
+    rat_params.hi = 25.0;
+    rat_params.MaxIter  = 10000;
+    rat_params.tolerance= 1.0e-9;
+    rat_params.degree   = 14;
+    rat_params.precision= 50;
+  }
+};
+
+struct EvolParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
+                                  Integer, StartTrajectory,
+                                  Integer, Trajectories,
+				  Integer, SaveInterval,
+				  Integer, Steps,
+				  RealD, TrajectoryLength,
+                                  bool, MetropolisTest,
+				  std::string, StartingType,
+				  std::vector<Integer>, GparityDirs,
+				  std::vector<EOFAparameters>, eofa_l,
+				  RatQuoParameters, rat_quo_s,
+				  RatQuoParameters, rat_quo_DSDR);
+
+  EvolParameters() {
+    //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
+    MetropolisTest    = false;
+    StartTrajectory   = 0;
+    Trajectories      = 50;
+    SaveInterval = 5;
+    StartingType      = "ColdStart";
+    GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
+    Steps = 5;
+    TrajectoryLength = 1.0;
+  }
+};
+
+bool fileExists(const std::string &fn){
+  std::ifstream f(fn);
+  return f.good();
+}
+
+
+
+
+struct LanczosParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
+				  double, alpha,
+				  double, beta,
+				  double, mu,
+				  int, ord,
+				  int, n_stop,
+				  int, n_want,
+				  int, n_use,
+				  double, tolerance);
+
+  LanczosParameters() {
+    alpha = 35;
+    beta = 5;
+    mu = 0;
+    ord = 100;
+    n_stop = 10;
+    n_want = 10;
+    n_use = 15;
+    tolerance = 1e-6;
+  }
+};
+
+
+
+template<typename FermionActionD, typename FermionFieldD>
+void computeEigenvalues(std::string param_file,
+			GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+			FermionActionD &action, GridParallelRNG &rng){
+  
+  LanczosParameters params;
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "LanczosParameters", params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "LanczosParameters", params);
+  }
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  action.ImportGauge(latt);
+
+  SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
+  PlainHermOp<FermionFieldD> hermop_wrap(hermop);
+  //ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
+  assert(params.mu == 0.0);
+
+  Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
+  FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
+
+  std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
+  ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000);
+
+  std::vector<RealD> eval(params.n_use);
+  std::vector<FermionFieldD> evec(params.n_use, rbGrid);
+  int Nconv;
+  IRL.calc(eval, evec, gauss_o, Nconv);
+
+  std::cout << "Eigenvalues:" << std::endl;
+  for(int i=0;i<params.n_want;i++){
+    std::cout << i << " " << eval[i] << std::endl;
+  }
+}
+
+
+//Check the quality of the RHMC approx
+//action_or_md toggles checking the action (0), MD (1) or both (2) setups
+template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
+void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+	       FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
+	       int inv_pow, const std::string &quark_descr, int action_or_md){
+  assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
+  
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  numOp.ImportGauge(latt);
+  denOp.ImportGauge(latt);
+
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
+  SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
+
+  PowerMethod<FermionFieldD> power_method;
+  RealD lambda_max;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
+
+  lambda_max = power_method(MdagM,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
+  lambda_max = power_method(VdagV,gauss_o);
+  std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
+
+  if(action_or_md == 0 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
+    std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+
+  std::cout << "-------------------------------------------------------------------------------" << std::endl;
+
+  if(action_or_md == 1 || action_or_md == 2){
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD); 
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+    InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+    std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+    InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
+    std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  }
+}
+
+
+template<typename FermionImplPolicy>
+void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+	       GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  RealD scale = std::sqrt(0.5);
+  gaussian(rng,eta); eta = eta * scale;
+
+  //Use the inbuilt check
+  EOFA.refresh(latt, eta);
+  EOFA.S(latt);
+  std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
+}
+
+
+template<typename FermionImplPolicy>
+class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
+  EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
+}
+
+//Applications of M^{-1} cost the same as M for EOFA!
+template<typename FermionImplPolicy>
+class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
+  LatticeGaugeFieldD &U;
+public:
+  EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
+
+  typedef typename FermionImplPolicy::FermionField Field;
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); } 
+
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
+  void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
+};
+
+template<typename FermionImplPolicy>
+void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
+		    GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
+  std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
+  EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
+  typename FermionImplPolicy::FermionField eta(FGrid);
+  gaussian(rng,eta);
+  PowerMethod<typename FermionImplPolicy::FermionField> power_method;
+  auto lambda_max = power_method(linop,eta);
+  std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
+}
+
+
+NAMESPACE_BEGIN(Grid);
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD   Tolerance;
+    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
+    Integer MaxInnerIterations;
+    Integer MaxOuterIterations;
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+
+    Integer TotalInnerIterations; //Number of inner CG iterations
+    Integer TotalOuterIterations; //Number of restarts
+    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+
+    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
+						    Integer maxinnerit, 
+						    Integer maxouterit, 
+						    GridBase* _sp_grid4, 
+						    GridBase* _sp_grid5, 
+						    FermionOperatorF &_FermOpF,
+						    FermionOperatorD &_FermOpD,
+						    SchurOperatorF   &_LinOpF,
+						    SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      InnerTolerance(tol), 
+      MaxInnerIterations(maxinnerit), 
+      MaxOuterIterations(maxouterit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5),
+      OuterLoopNormMult(100.) 
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
+      MPCG.InnerTolerance = InnerTolerance;
+      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    using OperatorFunction<FieldD>::operator();
+
+    RealD Tolerance;
+    Integer MaxIterations;
+
+    RealD Delta; //reliable update parameter
+
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+    
+    MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol, 
+								  RealD delta,
+								  Integer maxit, 
+								  GridBase* _sp_grid4, 
+								  GridBase* _sp_grid5, 
+								  FermionOperatorF &_FermOpF,
+								  FermionOperatorD &_FermOpD,
+								  SchurOperatorF   &_LinOpF,
+								  SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      Delta(delta),
+      MaxIterations(maxit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5)
+    { 
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      precisionChange(FermOpF.Umu, FermOpD.Umu);
+
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+
+      ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
+      std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+
+
+
+NAMESPACE_END(Grid);
+
+
+
+
+
+int main(int argc, char **argv) {
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  std::string param_file = "params.xml";
+  bool file_load_check = false;
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--param_file"){
+      assert(i!=argc-1);
+      param_file = argv[i+1];
+    }else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
+      file_load_check = true;
+    }
+  }
+
+  //Read the user parameters
+  EvolParameters user_params;
+  
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "Params", user_params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    {
+      Grid::XmlWriter wr(param_file + ".templ");
+      write(wr, "Params", user_params);
+    }
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+  //Check the parameters
+  if(user_params.GparityDirs.size() != Nd-1){
+    std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
+    exit(1);
+  }
+  for(int i=0;i<Nd-1;i++)
+    if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
+      std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
+      exit(1);
+    }
+
+
+  typedef GparityMobiusEOFAFermionD EOFAactionD;
+  typedef GparityMobiusFermionD FermionActionD;
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef GparityMobiusEOFAFermionF EOFAactionF;
+  typedef GparityMobiusFermionF FermionActionF;
+  typedef typename FermionActionF::Impl_t FermionImplPolicyF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  IntegratorParameters MD;
+  typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
+  typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
+  MD.name    = std::string("MinimumNorm2");
+  MD.MDsteps = user_params.Steps;
+  MD.trajL   = user_params.TrajectoryLength;
+
+  HMCparameters HMCparams;
+  HMCparams.StartTrajectory  = user_params.StartTrajectory;
+  HMCparams.Trajectories     = user_params.Trajectories;
+  HMCparams.NoMetropolisUntil= 0;
+  HMCparams.StartingType     = user_params.StartingType;
+  HMCparams.MetropolisTest = user_params.MetropolisTest;
+  HMCparams.MD = MD;
+  HMCWrapper TheHMC(HMCparams);
+
+  // Grid from the command line arguments --grid and --mpi
+  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
+
+  CheckpointerParameters CPparams;
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix    = "ckpoint_rng";
+  CPparams.saveInterval  = user_params.SaveInterval;
+  CPparams.format        = "IEEE64BIG";
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+
+  //aiming for ainv=2.068             me          Bob
+  //Estimated  a(ml+mres) [48ID] = 0.001048    0.00104 
+  //           a(mh+mres) [48ID] = 0.028847    0.02805
+  //Estimate Ls=12, b+c=2  mres~0.0003
+
+  const int Ls      = 12;
+  Real beta         = 1.946;
+  Real light_mass   = 0.00074;   //0.00104 - mres_approx;
+  Real strange_mass = 0.02775;    //0.02805 - mres_approx
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+  RealD mobius_scale = 2.; //b+c
+
+  RealD mob_bmc = 1.0;
+  RealD mob_b = (mobius_scale + mob_bmc)/2.;
+  RealD mob_c = (mobius_scale - mob_bmc)/2.;
+
+  //Setup the Grids
+  auto UGridD   = TheHMC.Resources.GetCartesian();
+  auto UrbGridD = TheHMC.Resources.GetRBCartesian();
+  auto FGridD     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
+  auto FrbGridD   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
+
+  GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
+
+  ConjugateIwasakiGaugeActionD GaugeAction(beta);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(UGridD);
+  LatticeGaugeFieldF Uf(UGridF);
+ 
+  //Setup the BCs
+  FermionActionD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+
+  std::vector<int> dirs4(Nd);
+  for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
+  dirs4[Nd-1] = 0; //periodic gauge BC in time
+
+  GaugeImplPolicy::setDirections(dirs4); //gauge BC
+
+  //Run optional gauge field checksum checker and exit
+  if(file_load_check){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  ////////////////////////////////////
+  // Collect actions
+  ////////////////////////////////////
+  ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
+  ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
+  ActionLevel<HMCWrapper::Field> Level3(2); //gauge
+
+
+  /////////////////////////////////////////////////////////////
+  // Light EOFA action
+  // have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
+  /////////////////////////////////////////////////////////////
+  typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
+  typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
+  typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
+  typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
+  typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
+  
+  std::vector<RealD> eofa_light_masses = { light_mass ,  0.004,   0.016,   0.064,   0.256    };
+  std::vector<RealD> eofa_pv_masses =    { 0.004       , 0.016,   0.064,   0.256,   1.0      };
+  int n_light_hsb = 5;
+  assert(user_params.eofa_l.size() == n_light_hsb);
+  
+  EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
+
+  for(int i=0;i<n_light_hsb;i++){
+    RealD iml = eofa_light_masses[i];
+    RealD ipv = eofa_pv_masses[i];
+
+    EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
+    EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+    EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
+
+    EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
+    EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
+    
+    EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
+    EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
+
+#if 1
+    //Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance  as Delta for now
+    EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+    EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+
+#else
+    
+    EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
+    DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
+    DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
+    
+    std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
+    std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
+#endif
+
+    
+    EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
+							*LopD, *RopD, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*ActionMCG_L, *ActionMCG_R, 
+							*DerivMCG_L, *DerivMCG_R, 
+							user_params.eofa_l[i].rat_params, true);
+    EOFA_pfactions[i] = EOFA;
+    Level1.push_back(EOFA);
+  }
+
+  ////////////////////////////////////
+  // Strange action
+  ////////////////////////////////////
+  FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
+
+  FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
+  FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
+
+  RationalActionParams rat_act_params_s;
+  rat_act_params_s.inv_pow  = 4; // (M^dag M)^{1/4}
+  rat_act_params_s.precision= 60;
+  rat_act_params_s.MaxIter  = 10000;
+  user_params.rat_quo_s.Export(rat_act_params_s);
+  std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); 
+  DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); 
+  Level1.push_back(&Quotient_s);  
+
+  ///////////////////////////////////
+  // DSDR action
+  ///////////////////////////////////
+  RealD dsdr_mass=-1.8;   
+  //Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
+  RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
+  RealD dsdr_epsilon_b = 0.5; 
+  GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
+  GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
+
+  GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
+  GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
+ 
+  RationalActionParams rat_act_params_DSDR;
+  rat_act_params_DSDR.inv_pow  = 2; // (M^dag M)^{1/2}
+  rat_act_params_DSDR.precision= 60;
+  rat_act_params_DSDR.MaxIter  = 10000;
+  user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
+  std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
+  Level2.push_back(&Quotient_DSDR);
+
+  /////////////////////////////////////////////////////////////
+  // Gauge action
+  /////////////////////////////////////////////////////////////
+  Level3.push_back(&GaugeAction);
+
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  TheHMC.TheAction.push_back(Level3);
+  std::cout << GridLogMessage << " Action complete "<< std::endl;
+
+
+  //Action tuning
+  bool 
+    tune_rhmc_s=false, eigenrange_s=false, 
+    tune_rhmc_DSDR=false, eigenrange_DSDR=false, 
+    check_eofa=false, 
+    upper_bound_eofa=false, lower_bound_eofa(false);
+
+  std::string lanc_params_s;
+  std::string lanc_params_DSDR;
+  int tune_rhmc_s_action_or_md;
+  int tune_rhmc_DSDR_action_or_md;
+  int eofa_which_hsb;
+
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--tune_rhmc_s"){
+      assert(i < argc-1);
+      tune_rhmc_s=true;
+      tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_s"){
+      assert(i < argc-1);
+      eigenrange_s=true;
+      lanc_params_s = argv[i+1];
+    }
+    else if(sarg == "--tune_rhmc_DSDR"){
+      assert(i < argc-1);
+      tune_rhmc_DSDR=true;
+      tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
+    }
+    else if(sarg == "--eigenrange_DSDR"){
+      assert(i < argc-1);
+      eigenrange_DSDR=true;
+      lanc_params_DSDR = argv[i+1];
+    }
+    else if(sarg == "--check_eofa"){
+      assert(i < argc-1);
+      check_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
+      assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
+    }
+    else if(sarg == "--upper_bound_eofa"){
+      assert(i < argc-1);
+      upper_bound_eofa = true;
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+    else if(sarg == "--lower_bound_eofa"){
+      assert(i < argc-1);
+      lower_bound_eofa = true;      
+      eofa_which_hsb = std::stoi(argv[i+1]);
+      assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
+    }
+  }
+  if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
+    std::cout << GridLogMessage << "Running checks" << std::endl;
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+
+    //std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
+    //std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
+
+
+    if(check_eofa){
+      if(eofa_which_hsb >= 0){
+	std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+	checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
+      }else{
+	for(int i=0;i<n_light_hsb;i++){
+	  std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
+	  checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+	  std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
+	}
+      }
+    }	  
+    if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
+    if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange",  tune_rhmc_s_action_or_md);
+    if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
+
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  //Run the HMC
+  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
+  TheHMC.Run();
+
+  std::cout << GridLogMessage << " Done" << std::endl;
+  Grid_finalize();
+  return 0;
+} // main
diff --git a/tests/IO/Test_field_array_io.cc b/tests/IO/Test_field_array_io.cc
new file mode 100644
index 00000000..51ea7893
--- /dev/null
+++ b/tests/IO/Test_field_array_io.cc
@@ -0,0 +1,184 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/IO/Test_field_array_io.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+
+//This test demonstrates and checks a single-file write of an arbitrary array of fields
+
+uint64_t writeHeader(const uint32_t size, const uint32_t checksum, const std::string &format, const std::string &file){
+  std::ofstream fout(file,std::ios::out|std::ios::in);
+  fout.seekp(0,std::ios::beg);
+  fout << std::setw(10) << size << std::endl;
+  fout << std::hex << std::setw(10) << checksum << std::endl;
+  fout << format << std::endl;
+  return fout.tellp();
+}
+ 
+uint64_t readHeader(uint32_t &size, uint32_t &checksum, std::string &format, const std::string &file){
+  std::ifstream fin(file);
+  std::string line;
+  getline(fin,line);
+  {
+    std::stringstream ss; ss <<line ; ss >> size;
+  }
+  getline(fin,line);
+  {
+    std::stringstream ss; ss <<line ; ss >> std::hex >> checksum;
+  }
+  getline(fin,format);
+  removeWhitespace(format);
+      
+  return fin.tellg();
+}
+ 
+template<typename FieldType>
+void writeFieldArray(const std::string &file, const std::vector<FieldType> &data){
+  typedef typename FieldType::vector_object vobj;
+  typedef typename FieldType::scalar_object sobj;
+  GridBase* grid = data[0].Grid(); //assume all fields have the same Grid
+  BinarySimpleMunger<sobj, sobj> munge; //straight copy
+
+  //We need a 2-pass header write, first to establish the size, the second pass writes the checksum
+  std::string format = getFormatString<typename FieldType::vector_object>();
+
+  uint64_t offset; //leave 64 bits for header
+  if ( grid->IsBoss() ) { 
+    NerscIO::truncate(file);
+    offset = writeHeader(data.size(), 0, format, file);
+  }
+  grid->Broadcast(0,(void *)&offset,sizeof(offset)); //use as a barrier
+
+  std::cout << "Data offset write " << offset << std::endl;
+  std::cout << "Data size write " << data.size() << std::endl;
+  uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj);
+  std::cout << "Field size = " << field_size << " B" << std::endl;
+
+  uint32_t checksum = 0;
+  for(int i=0;i<data.size();i++){
+    std::cout << "Data field write " << i << " offset " << offset << std::endl;
+    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
+    BinaryIO::writeLatticeObject<vobj,sobj>(const_cast<FieldType &>(data[i]),file,munge,offset,format,
+					    nersc_csum,scidac_csuma,scidac_csumb);
+    offset += field_size;
+    checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2);
+  }
+  std::cout << "Write checksum " << checksum << std::endl;
+
+  if ( grid->IsBoss() ) { 
+    writeHeader(data.size(), checksum, format, file);
+  }
+}
+
+
+template<typename FieldType>
+void readFieldArray(std::vector<FieldType> &data, const std::string &file){
+  typedef typename FieldType::vector_object vobj;
+  typedef typename FieldType::scalar_object sobj;
+  assert(data.size() > 0);
+  GridBase* grid = data[0].Grid(); //assume all fields have the same Grid
+  BinarySimpleUnmunger<sobj, sobj> munge; //straight copy
+  
+  uint32_t hdr_checksum, hdr_size;
+  std::string format;
+  uint64_t offset = readHeader(hdr_size, hdr_checksum, format, file);
+  
+  std::cout << "Data offset read " << offset << std::endl;  
+  std::cout << "Data size read " << hdr_size << std::endl;
+  assert(data.size() == hdr_size);
+
+  uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj);
+
+  uint32_t checksum = 0;
+
+  for(int i=0;i<data.size();i++){
+    std::cout << "Data field read " << i << " offset " << offset << std::endl;
+    uint32_t nersc_csum,scidac_csuma,scidac_csumb;
+    BinaryIO::readLatticeObject<vobj,sobj>(data[i],file,munge,offset,format,
+					   nersc_csum,scidac_csuma,scidac_csumb);
+    offset += field_size;
+    checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2);
+  }
+
+  std::cout << "Header checksum " << hdr_checksum << std::endl;    
+  std::cout << "Read checksum " << checksum << std::endl;
+    
+
+  assert( hdr_checksum == checksum );
+}
+
+
+
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  Coordinate latt   = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
+  Coordinate mpi_layout  = GridDefaultMpi();
+
+  const int Ls=8;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(latt, simd_layout, mpi_layout);
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  typedef DomainWallFermionD::FermionField FermionField;
+
+  int nfield = 20;
+  std::vector<FermionField> data(nfield, FGrid);
+
+  for(int i=0;i<data.size();i++)
+    gaussian(RNG5, data[i]);
+  
+  std::string file = "test_field_array_io.0";
+  writeFieldArray(file, data);
+
+  std::vector<FermionField> data_r(nfield, FGrid);
+  readFieldArray(data_r, file);
+  
+  for(int i=0;i<nfield;i++){
+    FermionField diff = data_r[i] - data[i];
+    RealD norm_diff = norm2(diff);
+    std::cout << "Norm2 of difference between stored and loaded data index " << i << " : " << norm_diff << std::endl;
+  }
+  
+  std::cout << "Done" << std::endl;
+
+  Grid_finalize();
+}
diff --git a/tests/lanczos/Test_compressed_lanczos_gparity.cc b/tests/lanczos/Test_compressed_lanczos_gparity.cc
new file mode 100644
index 00000000..ca353d61
--- /dev/null
+++ b/tests/lanczos/Test_compressed_lanczos_gparity.cc
@@ -0,0 +1,485 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_compressed_lanczos_gparity.cc
+
+    Copyright (C) 2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Leans heavily on Christoph Lehner's code
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+/*
+ *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features 
+ *  in Grid that were intended to be used to support blocked Aggregates, from
+ */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
+
+using namespace std;
+using namespace Grid;
+
+//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
+void readConfiguration(LatticeGaugeFieldD &U,
+		       const std::string &config,
+		       bool is_cps_cfg = false){
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+  typedef GaugeStatistics<ConjugateGimplD> GaugeStats;
+     
+  FieldMetaData header;
+  NerscIO::readConfiguration<GaugeStats>(U, header, config);
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
+}
+
+//Lanczos parameters in CPS conventions
+struct CPSLanczosParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
+				  RealD, alpha,
+				  RealD, beta,
+				  int, ch_ord,
+				  int, N_use,
+				  int, N_get,
+				  int, N_true_get,
+				  RealD, stop_rsd,
+				  int, maxits);
+
+  //Translations
+  ChebyParams getChebyParams() const{
+    ChebyParams out;
+    out.alpha = beta*beta; //aka lo
+    out.beta = alpha*alpha; //aka hi
+    out.Npoly = ch_ord+1;
+    return out;
+  }
+  int Nstop() const{ return N_true_get; }
+  int Nm() const{ return N_use; }
+  int Nk() const{ return N_get; }
+};
+
+//Maybe this class should be in the main library?
+template<class Fobj,class CComplex,int nbasis>
+class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
+{ 
+public:
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
+  typedef Lattice<Fobj>          FineField;
+
+  LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
+			      LinearOperatorBase<FineField> &FineOp,
+			      int checkerboard) 
+    // Base constructor
+    : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard) 
+  {};
+
+  void checkpointFine(std::string evecs_file,std::string evals_file)
+  {
+    assert(this->subspace.size()==nbasis);
+    emptyUserRecord record;
+    Grid::ScidacWriter WR(this->_FineGrid->IsBoss());
+    WR.open(evecs_file);
+    for(int k=0;k<nbasis;k++) {
+      WR.writeScidacFieldRecord(this->subspace[k],record);
+    }
+    WR.close();
+    
+    XmlWriter WRx(evals_file);
+    write(WRx,"evals",this->evals_fine);
+  }
+
+  void checkpointFineRestore(std::string evecs_file,std::string evals_file)
+  {
+    this->evals_fine.resize(nbasis);
+    this->subspace.resize(nbasis,this->_FineGrid);
+    
+    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",this->evals_fine);
+
+    if(this->evals_fine.size() < nbasis) assert(0 && "Not enough fine evals to complete basis");
+    if(this->evals_fine.size() > nbasis){ //allow the use of precomputed evecs with a larger #evecs
+      std::cout << GridLogMessage << "Truncating " << this->evals_fine.size() << " evals to basis size " << nbasis << std::endl;
+      this->evals_fine.resize(nbasis);
+    }     
+    
+    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evecs from "<<evecs_file<<std::endl;
+    emptyUserRecord record;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<nbasis;k++) {
+      this->subspace[k].Checkerboard()=this->_checkerboard;
+      RD.readScidacFieldRecord(this->subspace[k],record);
+      
+    }
+    RD.close();
+  }
+
+  void checkpointCoarse(std::string evecs_file,std::string evals_file)
+  {
+    int n = this->evec_coarse.size();
+    emptyUserRecord record;
+    Grid::ScidacWriter WR(this->_CoarseGrid->IsBoss());
+    WR.open(evecs_file);
+    for(int k=0;k<n;k++) {
+      WR.writeScidacFieldRecord(this->evec_coarse[k],record);
+    }
+    WR.close();
+    
+    XmlWriter WRx(evals_file);
+    write(WRx,"evals",this->evals_coarse);
+  }
+
+  void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
+  {
+    std::cout << "resizing coarse vecs to " << nvec<< std::endl;
+    this->evals_coarse.resize(nvec);
+    this->evec_coarse.resize(nvec,this->_CoarseGrid);
+    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",this->evals_coarse);
+
+    assert(this->evals_coarse.size()==nvec);
+    emptyUserRecord record;
+    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evecs from "<<evecs_file<<std::endl;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<nvec;k++) {
+      RD.readScidacFieldRecord(this->evec_coarse[k],record);
+    }
+    RD.close();
+  }
+};
+
+struct Options{
+  std::vector<int> blockSize;
+  std::vector<int> GparityDirs;
+  int Ls;
+  RealD mass;
+  RealD M5;
+  RealD mobius_scale;
+  std::string config;
+  bool is_cps_cfg;
+
+  double coarse_relax_tol;
+  int smoother_ord;
+  
+  CPSLanczosParams fine;
+  CPSLanczosParams coarse;
+
+  bool write_fine = false;
+  std::string write_fine_file;
+
+  bool read_fine = false;
+  std::string read_fine_file;
+
+  bool write_coarse = false;
+  std::string write_coarse_file;
+
+  bool read_coarse = false;
+  std::string read_coarse_file;
+
+  
+  Options(){
+    blockSize = std::vector<int> ({2,2,2,2,2});
+    GparityDirs = std::vector<int> ({1,1,1}); //1 for each GP direction
+    
+    Ls = 12;
+    mass = 0.01;
+    M5 = 1.8;
+    is_cps_cfg = false;
+    mobius_scale = 2.0;
+    
+    fine.alpha = 2;
+    fine.beta = 0.1;
+    fine.ch_ord = 100;
+    fine.N_use = 70;
+    fine.N_get = 60;
+    fine.N_true_get = 60;
+    fine.stop_rsd = 1e-8;
+    fine.maxits = 10000;
+
+    coarse.alpha = 2;
+    coarse.beta = 0.1;
+    coarse.ch_ord = 100;
+    coarse.N_use = 200;
+    coarse.N_get = 190;
+    coarse.N_true_get = 190;
+    coarse.stop_rsd = 1e-8;
+    coarse.maxits = 10000;
+
+    coarse_relax_tol = 1e5;
+    smoother_ord = 20;
+
+    write_fine = false;
+    read_fine = false;
+    write_coarse = false;
+    read_coarse = false;
+  }
+};  
+
+template<int nbasis>
+void runTest(const Options &opt){
+	        //Fine grids
+  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  GridDefaultSimd(Nd,vComplex::Nsimd()),   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(opt.Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(opt.Ls,UGrid);
+
+  //Setup G-parity BCs
+  assert(Nd == 4);
+  std::vector<int> dirs4(4);
+  for(int i=0;i<3;i++) dirs4[i] = opt.GparityDirs[i];
+  dirs4[3] = 0; //periodic gauge BC in time
+  
+  std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
+  ConjugateGimplD::setDirections(dirs4); //gauge BC
+
+  GparityWilsonImplD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = opt.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+  std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
+  
+  //Read the gauge field
+  LatticeGaugeField Umu(UGrid);  
+  readConfiguration(Umu, opt.config, opt.is_cps_cfg);
+
+  //Setup the coarse grids  
+  auto fineLatt     = GridDefaultLatt();
+  Coordinate coarseLatt(4);
+  for (int d=0;d<4;d++){
+    coarseLatt[d] = fineLatt[d]/opt.blockSize[d];    assert(coarseLatt[d]*opt.blockSize[d]==fineLatt[d]);
+  }
+
+  std::cout << GridLogMessage<< " 5d coarse lattice is ";
+  for (int i=0;i<4;i++){
+    std::cout << coarseLatt[i]<<"x";
+  } 
+  int cLs = opt.Ls/opt.blockSize[4]; assert(cLs*opt.blockSize[4]==opt.Ls);
+  std::cout << cLs<<std::endl;
+  
+  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
+  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
+
+  //Dirac operator
+  double bmc =  1.;      
+  double b = (opt.mobius_scale + bmc)/2.;  // b = 1/2 [ (b+c) + (b-c) ]
+  double c = (opt.mobius_scale - bmc)/2.;  // c = 1/2 [ (b+c) - (b-c) ]
+  
+  GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, opt.mass, opt.M5, b,c,Params);
+  typedef GparityMobiusFermionD::FermionField FermionField;
+  
+  SchurDiagTwoOperator<GparityMobiusFermionD, FermionField> SchurOp(action);
+
+  typedef GparityWilsonImplD::SiteSpinor SiteSpinor;
+
+  const CPSLanczosParams &fine = opt.fine;
+  const CPSLanczosParams &coarse = opt.coarse;
+
+  std::cout << GridLogMessage << "Keep " << fine.N_true_get   << " fine   vectors" << std::endl;
+  std::cout << GridLogMessage << "Keep " << coarse.N_true_get << " coarse vectors" << std::endl;
+  assert(coarse.N_true_get >= fine.N_true_get);
+
+  assert(nbasis<=fine.N_true_get);
+  LocalCoherenceLanczosScidac<SiteSpinor,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,SchurOp,Odd);
+  std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
+ 
+  //Compute and/or read fine evecs
+  if(opt.read_fine){
+    _LocalCoherenceLanczos.checkpointFineRestore(opt.read_fine_file + "_evecs.scidac", opt.read_fine_file + "_evals.xml");
+  }else{
+    std::cout << GridLogMessage << "Performing fine grid IRL" << std::endl;
+    std::cout << GridLogMessage << "Using Chebyshev alpha=" << fine.alpha << " beta=" << fine.beta << " ord=" << fine.ch_ord << std::endl;
+    _LocalCoherenceLanczos.calcFine(fine.getChebyParams(),
+				    fine.Nstop(),fine.Nk(),fine.Nm(),
+				    fine.stop_rsd,fine.maxits,0,0);
+    if(opt.write_fine){
+      std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
+      _LocalCoherenceLanczos.checkpointFine(opt.write_fine_file + "_evecs.scidac", opt.write_fine_file + "_evals.xml");
+    }
+  }
+  
+  //Block orthonormalise (this should be part of calcFine?)
+  std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
+  _LocalCoherenceLanczos.Orthogonalise();
+  std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
+
+  ChebyParams smoother = fine.getChebyParams();
+  smoother.Npoly = opt.smoother_ord+1;
+
+  if(opt.read_coarse){
+    _LocalCoherenceLanczos.checkpointCoarseRestore(opt.read_coarse_file + "_evecs.scidac", opt.read_coarse_file + "_evals.xml",coarse.Nstop());
+
+  }else{
+    std::cout << GridLogMessage << "Performing coarse grid IRL" << std::endl;
+    std::cout << GridLogMessage << "Using Chebyshev alpha=" << coarse.alpha << " beta=" << coarse.beta << " ord=" << coarse.ch_ord << std::endl;	
+    _LocalCoherenceLanczos.calcCoarse(coarse.getChebyParams(), smoother, opt.coarse_relax_tol,
+				      coarse.Nstop(), coarse.Nk() ,coarse.Nm(),
+				      coarse.stop_rsd, coarse.maxits, 
+				      0,0);
+
+    if(opt.write_coarse){
+      std::cout << GridLogIRL<<"Checkpointing Coarse evecs"<<std::endl;
+      _LocalCoherenceLanczos.checkpointCoarse(opt.write_coarse_file + "_evecs.scidac", opt.write_coarse_file + "_evals.xml");
+    }
+
+  }
+
+  //Test the eigenvectors
+  //To remove high-frequency noise we apply a Chebyshev smoothing
+  Chebyshev<FermionField> cheb_smoother(smoother);
+    
+  FermionField evec(FrbGrid);
+  FermionField evec_sm(FrbGrid); //smoothed
+  FermionField tmp(FrbGrid);
+  RealD eval;
+  
+  for(int i=0;i<coarse.N_true_get;i++){    
+    _LocalCoherenceLanczos.getFineEvecEval(evec, eval, i);
+
+    //Check unsmoothed evec
+    SchurOp.HermOp(evec, tmp);
+    tmp = tmp - eval*evec;
+    RealD norm_unsmoothed = sqrt(norm2(tmp));
+    
+    //Check smoothed evec
+    cheb_smoother(SchurOp, evec, evec_sm);   
+    SchurOp.HermOp(evec_sm, tmp);
+    tmp = tmp - eval*evec_sm;
+    RealD norm_smoothed = sqrt(norm2(tmp));
+    
+    std::cout << GridLogMessage << "Eval " << eval << " unsmoothed resid " << norm_unsmoothed << " smoothed resid " << norm_smoothed << std::endl;
+  }
+}
+
+
+//Note:  because we rely upon physical properties we must use a "real" gauge configuration
+int main (int argc, char ** argv) {
+  Grid_init(&argc,&argv);
+  GridLogIRL.TimingMode(1);
+
+  Options opt;
+  int basis_size = 100;
+  
+  if(argc < 3){
+    std::cout << GridLogMessage << "Usage: <exe> <config> <gparity dirs> <options>" << std::endl;
+    std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
+    std::cout << GridLogMessage << "Options:" << std::endl;
+    std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
+    std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
+    std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents  (default 2.2.2.2.2)" << std::endl;
+    std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
+    std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_coarse <filename>: Real the parameters file for the coarse Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--write_coarse <filename stub>: Write coarse evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_coarse <filename stub>: Read coarse evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--smoother_ord :  Set the Chebyshev order of the smoother (default 20)" << std::endl;
+    std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
+    std::cout << GridLogMessage << "--basis_size : Select the basis size from 100,200,300,350 (default 100)" << std::endl;
+    Grid_finalize();
+    return 1;
+  }
+  opt.config = argv[1];
+  GridCmdOptionIntVector(argv[2], opt.GparityDirs);
+  assert(opt.GparityDirs.size() == 3);
+
+  for(int i=3;i<argc;i++){
+    std::string sarg = argv[i];
+    if(sarg == "--Ls"){
+      opt.Ls = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set Ls to " << opt.Ls << std::endl;
+    }else if(sarg == "--mass"){
+      std::istringstream ss(argv[i+1]); ss >> opt.mass;
+      std::cout << GridLogMessage << "Set quark mass to " << opt.mass << std::endl;
+    }else if(sarg == "--block"){
+      GridCmdOptionIntVector(argv[i+1], opt.blockSize);
+      assert(opt.blockSize.size() == 5);
+      std::cout << GridLogMessage << "Set block size to ";
+      for(int q=0;q<5;q++) std::cout << opt.blockSize[q] << " ";
+      std::cout << std::endl;      
+    }else if(sarg == "--is_cps_cfg"){
+      opt.is_cps_cfg = true;
+    }else if(sarg == "--write_irl_templ"){
+      XmlWriter writer("irl_templ.xml");
+      write(writer,"Params", opt.fine);
+      Grid_finalize();
+      return 0;
+    }else if(sarg == "--read_irl_fine"){
+      std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", opt.fine);
+    }else if(sarg == "--read_irl_coarse"){
+      std::cout << GridLogMessage << "Reading coarse IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", opt.coarse);
+    }else if(sarg == "--write_fine"){
+      opt.write_fine = true;
+      opt.write_fine_file = argv[i+1];
+    }else if(sarg == "--read_fine"){
+      opt.read_fine = true;
+      opt.read_fine_file = argv[i+1];
+    }else if(sarg == "--write_coarse"){
+      opt.write_coarse = true;
+      opt.write_coarse_file = argv[i+1];
+    }else if(sarg == "--read_coarse"){
+      opt.read_coarse = true;
+      opt.read_coarse_file = argv[i+1];
+    }else if(sarg == "--smoother_ord"){
+      std::istringstream ss(argv[i+1]); ss >> opt.smoother_ord;
+      std::cout << GridLogMessage << "Set smoother order to " << opt.smoother_ord << std::endl;
+    }else if(sarg == "--coarse_relax_tol"){
+      std::istringstream ss(argv[i+1]); ss >> opt.coarse_relax_tol;
+      std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << opt.coarse_relax_tol << std::endl;
+    }else if(sarg == "--mobius_scale"){
+      std::istringstream ss(argv[i+1]); ss >> opt.mobius_scale;
+      std::cout << GridLogMessage << "Set Mobius scale to " << opt.mobius_scale << std::endl;
+    }else if(sarg == "--basis_size"){
+      basis_size = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set basis size to " << basis_size << std::endl;
+    }
+  }
+
+  switch(basis_size){
+  case 100:
+    runTest<100>(opt); break;
+  case 200:
+    runTest<200>(opt); break;
+  case 300:
+    runTest<300>(opt); break;
+  case 350:
+    runTest<350>(opt); break;
+  default:
+    std::cout << GridLogMessage << "Unsupported basis size " << basis_size << std::endl;
+    assert(0);
+  }
+  
+  Grid_finalize();
+}
+
diff --git a/tests/lanczos/Test_evec_compression.cc b/tests/lanczos/Test_evec_compression.cc
new file mode 100644
index 00000000..1636ea3a
--- /dev/null
+++ b/tests/lanczos/Test_evec_compression.cc
@@ -0,0 +1,582 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_evec_compression.cc
+
+    Copyright (C) 2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+/*
+ *
+ * This test generates eigenvectors using the Lanczos algorithm then attempts to use local coherence compression
+ * to express those vectors in terms of a basis formed from a subset. This test is useful for finding the optimal
+ * blocking and basis size for performing a Local Coherence Lanczos
+ */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
+
+using namespace std;
+using namespace Grid;
+
+//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
+template<typename Gimpl>
+void readConfiguration(LatticeGaugeFieldD &U,
+		       const std::string &config,
+		       bool is_cps_cfg = false){
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+  typedef GaugeStatistics<Gimpl> GaugeStats;
+     
+  FieldMetaData header;
+  NerscIO::readConfiguration<GaugeStats>(U, header, config);
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
+}
+
+//Lanczos parameters in CPS conventions
+struct CPSLanczosParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
+				  RealD, alpha,
+				  RealD, beta,
+				  int, ch_ord,
+				  int, N_use,
+				  int, N_get,
+				  int, N_true_get,
+				  RealD, stop_rsd,
+				  int, maxits);
+
+  //Translations
+  ChebyParams getChebyParams() const{
+    ChebyParams out;
+    out.alpha = beta*beta; //aka lo
+    out.beta = alpha*alpha; //aka hi
+    out.Npoly = ch_ord+1;
+    return out;
+  }
+  int Nstop() const{ return N_true_get; }
+  int Nm() const{ return N_use; }
+  int Nk() const{ return N_get; }
+};
+
+
+template<class Fobj,class CComplex,int nbasis>
+class LocalCoherenceCompressor{
+public:
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<Fobj>                       FineField;
+  
+  void compress(std::vector<FineField> &basis,
+		std::vector<CoarseField> &compressed_evecs,
+		const std::vector<FineField> &evecs_in,
+		GridBase *FineGrid,
+		GridBase *CoarseGrid){
+    int nevecs = evecs_in.size();
+    assert(nevecs > nbasis);
+    
+    //Construct the basis
+    basis.resize(nbasis, FineGrid);
+    for(int b=0;b<nbasis;b++) basis[b] = evecs_in[b];
+
+    //Block othornormalize basis
+    CoarseScalar InnerProd(CoarseGrid);
+    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
+    blockOrthogonalise(InnerProd,basis);
+    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
+    blockOrthogonalise(InnerProd,basis);
+
+    //The coarse grid representation is the field of vectors of block inner products
+    std::cout << GridLogMessage << "Compressing eigevectors" << std::endl;
+    compressed_evecs.resize(nevecs, CoarseGrid);
+    for(int i=0;i<nevecs;i++) blockProject(compressed_evecs[i], evecs_in[i], basis);
+    std::cout << GridLogMessage << "Compression complete" << std::endl;
+  }
+
+  void uncompress(FineField &evec, const int i, const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs) const{
+    blockPromote(compressed_evecs[i],evec,basis);  
+  }
+
+  //Test uncompressed eigenvectors of Linop.HermOp to precision 'base_tolerance' for i<nbasis and 'base_tolerance*relax' for i>=nbasis
+  //Because the uncompressed evec has a lot of high mode noise (unimportant for deflation) we apply a smoother before testing.
+  //The Chebyshev used by the Lanczos should be sufficient as a smoother
+  bool testCompression(LinearOperatorBase<FineField> &Linop, OperatorFunction<FineField>   &smoother,
+		       const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<RealD> &evals,
+		       const RealD base_tolerance, const RealD relax){
+    std::cout << GridLogMessage << "Testing quality of uncompressed evecs (after smoothing)" << std::endl;
+   
+    GridBase* FineGrid = basis[0].Grid();
+    GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+    bool fail = false;
+    FineField evec(FineGrid), Mevec(FineGrid), evec_sm(FineGrid);
+    for(int i=0;i<compressed_evecs.size();i++){
+      std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
+      uncompress(evec, i, basis, compressed_evecs);
+
+      std::cout << GridLogMessage << "Smoothing evec " << i << std::endl;
+      smoother(Linop, evec, evec_sm);
+      
+      std::cout << GridLogMessage << "Computing residual for evec " << i << std::endl;
+      std::cout << GridLogMessage << "Linop" << std::endl;
+      Linop.HermOp(evec_sm, Mevec);
+      std::cout << GridLogMessage << "Linalg" << std::endl;
+      Mevec = Mevec - evals[i]*evec_sm;
+
+      std::cout << GridLogMessage << "Resid" << std::endl;
+      RealD tol = base_tolerance * (i<nbasis ? 1. : relax);
+      RealD res = sqrt(norm2(Mevec));
+      std::cout << GridLogMessage << "Evec idx " << i << " res " << res << " tol " << tol << std::endl;
+      if(res > tol) fail = true;
+    }
+    return fail;
+  }
+
+  //Compare uncompressed evecs to original evecs
+  void compareEvecs(const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<FineField> &orig_evecs){
+    std::cout << GridLogMessage << "Comparing uncompressed evecs to original evecs" << std::endl;
+    
+    GridBase* FineGrid = basis[0].Grid();
+    GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+    FineField evec(FineGrid), diff(FineGrid);
+    for(int i=0;i<compressed_evecs.size();i++){
+      std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
+      uncompress(evec, i, basis, compressed_evecs);
+      diff = orig_evecs[i] - evec;
+      RealD res = sqrt(norm2(diff));
+      std::cout << GridLogMessage << "Evec idx " << i << " res " << res << std::endl;
+    }
+  }
+  
+};
+
+template<class Fobj,class CComplex,int nbasis>
+void compareBlockPromoteTimings(const std::vector<Lattice<Fobj> > &basis, const std::vector<Lattice<iVector<CComplex,nbasis > > > &compressed_evecs){
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar; 
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<Fobj>                       FineField;
+
+  GridStopWatch timer;
+  
+  GridBase* FineGrid = basis[0].Grid();
+  GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+  FineField v1(FineGrid), v2(FineGrid);
+
+  //Start with a cold start
+  for(int i=0;i<basis.size();i++){
+    autoView( b_ , basis[i], CpuWrite);
+  }
+  for(int i=0;i<compressed_evecs.size();i++){
+    autoView( b_ , compressed_evecs[i], CpuWrite);
+  }
+  {
+    autoView( b_, v1, CpuWrite );
+  }
+
+  timer.Start();
+  blockPromote(compressed_evecs[0],v1,basis);  
+  timer.Stop();
+  std::cout << GridLogMessage << "Time for cold blockPromote v1 " << timer.Elapsed() << std::endl;
+
+  //Test to ensure it is actually doing a cold start by repeating
+  for(int i=0;i<basis.size();i++){
+    autoView( b_ , basis[i], CpuWrite);
+  }
+  for(int i=0;i<compressed_evecs.size();i++){
+    autoView( b_ , compressed_evecs[i], CpuWrite);
+  }
+  {
+    autoView( b_, v1, CpuWrite );
+  }
+
+  timer.Reset();
+  timer.Start();
+  blockPromote(compressed_evecs[0],v1,basis);  
+  timer.Stop();
+  std::cout << GridLogMessage << "Time for cold blockPromote v1 repeat (should be the same as above) " << timer.Elapsed() << std::endl;
+}
+
+struct Args{
+  int Ls;
+  RealD mass;
+  RealD M5;
+  bool is_cps_cfg;
+  RealD mobius_scale; //b+c
+  
+  CPSLanczosParams fine;
+  double coarse_relax_tol;
+
+  std::vector<int> blockSize;
+  std::vector<int> GparityDirs;
+
+  bool write_fine;
+  std::string write_fine_file;
+  bool read_fine;
+  std::string read_fine_file;
+
+  int basis_size;
+  
+  Args(){
+    blockSize = {2,2,2,2,2};
+    GparityDirs = {1,1,1}; //1 for each GP direction
+    
+    Ls = 12;
+    mass = 0.01;
+    M5 = 1.8;
+    is_cps_cfg = false;
+    mobius_scale = 2;
+    
+    fine.alpha = 2;
+    fine.beta = 0.1;
+    fine.ch_ord = 100;
+    fine.N_use = 70;
+    fine.N_get = 60;
+    fine.N_true_get = 60;
+    fine.stop_rsd = 1e-8;
+    fine.maxits = 10000;
+
+    coarse_relax_tol = 1e5;
+
+    write_fine = false;
+    read_fine = false;
+
+    basis_size = 100;
+  }
+};
+    
+
+GparityWilsonImplD::ImplParams setupGparityParams(const std::vector<int> &GparityDirs){
+  //Setup G-parity BCs
+  assert(Nd == 4);
+  std::vector<int> dirs4(4);
+  for(int i=0;i<3;i++) dirs4[i] = GparityDirs[i];
+  dirs4[3] = 0; //periodic gauge BC in time
+  
+  std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
+  ConjugateGimplD::setDirections(dirs4); //gauge BC
+
+  GparityWilsonImplD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+  std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
+  return Params;
+}
+
+WilsonImplD::ImplParams setupParams(){
+  WilsonImplD::ImplParams Params;
+  Complex one(1.0);
+  Complex mone(-1.0);
+  for(int i=0;i<Nd-1;i++) Params.boundary_phases[i] = one;
+  Params.boundary_phases[Nd-1] = mone;
+  return Params;
+}
+
+template<int nbasis, typename ActionType>
+void run_b(ActionType &action, const std::string &config, const Args &args){
+  //Fine grids
+  GridCartesian         * UGrid     = (GridCartesian*)action.GaugeGrid();
+  GridRedBlackCartesian * UrbGrid   = (GridRedBlackCartesian*)action.GaugeRedBlackGrid();
+  GridCartesian         * FGrid     = (GridCartesian*)action.FermionGrid();
+  GridRedBlackCartesian * FrbGrid   = (GridRedBlackCartesian*)action.FermionRedBlackGrid();
+
+  //Setup the coarse grids  
+  auto fineLatt     = GridDefaultLatt();
+  Coordinate coarseLatt(4);
+  for (int d=0;d<4;d++){
+    coarseLatt[d] = fineLatt[d]/args.blockSize[d];    assert(coarseLatt[d]*args.blockSize[d]==fineLatt[d]);
+  }
+
+  std::cout << GridLogMessage<< " 5d coarse lattice is ";
+  for (int i=0;i<4;i++){
+    std::cout << coarseLatt[i]<<"x";
+  } 
+  int cLs = args.Ls/args.blockSize[4]; assert(cLs*args.blockSize[4]==args.Ls);
+  std::cout << cLs<<std::endl;
+  
+  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
+  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
+  typedef vTComplex CComplex; 
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar;
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+
+  typedef typename ActionType::FermionField FermionField; 
+  
+  SchurDiagTwoOperator<ActionType,FermionField> SchurOp(action);
+
+  typedef typename ActionType::SiteSpinor SiteSpinor;
+
+  const CPSLanczosParams &fine = args.fine;
+  
+  //Do the fine Lanczos
+  std::vector<RealD> evals;
+  std::vector<FermionField> evecs;
+
+  if(args.read_fine){
+    evals.resize(fine.N_true_get);
+    evecs.resize(fine.N_true_get, FrbGrid);
+
+    std::string evals_file = args.read_fine_file + "_evals.xml";
+    std::string evecs_file = args.read_fine_file + "_evecs.scidac";
+    
+    std::cout << GridLogIRL<< "Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",evals);
+    
+    assert(evals.size()==fine.N_true_get);
+    
+    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
+    emptyUserRecord record;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<fine.N_true_get;k++) {
+      evecs[k].Checkerboard()=Odd;
+      RD.readScidacFieldRecord(evecs[k],record);
+      
+    }
+    RD.close();
+  }else{ 
+    int Nstop = fine.Nstop(); //==N_true_get
+    int Nm = fine.Nm();
+    int Nk = fine.Nk();
+    RealD resid = fine.stop_rsd;
+    int MaxIt = fine.maxits;
+    
+    assert(nbasis<=Nm);    
+    Chebyshev<FermionField>      Cheby(fine.getChebyParams());
+    FunctionHermOp<FermionField> ChebyOp(Cheby,SchurOp);
+    PlainHermOp<FermionField>    Op(SchurOp);
+
+    evals.resize(Nm);
+    evecs.resize(Nm,FrbGrid);
+    
+    ImplicitlyRestartedLanczos<FermionField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,0,0);
+
+    FermionField src(FrbGrid); 
+    typedef typename FermionField::scalar_type Scalar;
+    src=Scalar(1.0); 
+    src.Checkerboard() = Odd;
+
+    int Nconv;
+    IRL.calc(evals, evecs,src,Nconv,false);
+    if(Nconv < Nstop) assert(0 && "Fine lanczos failed to converge the required number of evecs"); //algorithm doesn't consider this a failure
+    if(Nconv > Nstop){
+      //Yes this potentially throws away some evecs but it is better than having a random number of evecs between Nstop and Nm!
+      evals.resize(Nstop);
+      evecs.resize(Nstop, FrbGrid);
+    }
+    
+    if(args.write_fine){
+      std::string evals_file = args.write_fine_file + "_evals.xml";
+      std::string evecs_file = args.write_fine_file + "_evecs.scidac";
+
+      std::cout << GridLogIRL<< "Writing evecs to "<<evecs_file<<std::endl;
+
+      emptyUserRecord record;
+      Grid::ScidacWriter WR(FrbGrid->IsBoss());
+      WR.open(evecs_file);
+      for(int k=0;k<evecs.size();k++) {
+	WR.writeScidacFieldRecord(evecs[k],record);
+      }
+      WR.close();
+
+      std::cout << GridLogIRL<< "Writing evals to "<<evals_file<<std::endl;
+      
+      XmlWriter WRx(evals_file);
+      write(WRx,"evals",evals);
+    }    
+  }
+    
+  //Do the compression
+  LocalCoherenceCompressor<SiteSpinor,vTComplex,nbasis> compressor;
+  std::vector<FermionField> basis(nbasis,FrbGrid);
+  std::vector<CoarseField> compressed_evecs(evecs.size(),CoarseGrid5);
+  
+  compressor.compress(basis, compressed_evecs, evecs, FrbGrid, CoarseGrid5);
+
+  compareBlockPromoteTimings(basis, compressed_evecs);
+
+  //Compare uncompressed and original evecs
+  compressor.compareEvecs(basis, compressed_evecs, evecs);
+  
+  //Create the smoother
+  Chebyshev<FermionField> smoother(fine.getChebyParams());
+  
+  //Test the quality of the uncompressed evecs
+  assert( compressor.testCompression(SchurOp, smoother, basis, compressed_evecs, evals, fine.stop_rsd, args.coarse_relax_tol) );   
+}
+
+template<typename ActionType>
+void run(ActionType &action, const std::string &config, const Args &args){
+  switch(args.basis_size){
+  case 50:
+    return run_b<50>(action,config,args);
+  case 100:
+    return run_b<100>(action,config,args);
+  case 150:
+    return run_b<150>(action,config,args);
+  case 200:
+    return run_b<200>(action,config,args);
+  case 250:
+    return run_b<250>(action,config,args);
+  case 300:
+    return run_b<300>(action,config,args);
+  case 350:
+    return run_b<350>(action,config,args);
+  case 400:
+    return run_b<400>(action,config,args);
+  default:
+    assert(0 && "Unsupported basis size: allowed values are 50,100,200,250,300,350,400");
+  }
+}
+
+
+
+
+//Note:  because we rely upon physical properties we must use a "real" gauge configuration
+int main (int argc, char ** argv) {
+  Grid_init(&argc,&argv);
+  GridLogIRL.TimingMode(1);
+
+  if(argc < 3){
+    std::cout << GridLogMessage << "Usage: <exe> <config file> <gparity dirs> <options>" << std::endl;
+    std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
+    std::cout << GridLogMessage << "Options:" << std::endl;
+    std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
+    std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
+    std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents  (default 2.2.2.2.2)" << std::endl;
+    std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
+    std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;    
+    std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
+    std::cout << GridLogMessage << "--action : Set the action from 'DWF', 'Mobius'  (default Mobius)" << std::endl;
+    std::cout << GridLogMessage << "--mobius_scale : Set the Mobius scale b+c (default 2)" << std::endl;
+    std::cout << GridLogMessage << "--basis_size : Set the basis size from 50,100,150,200,250,300,350,400 (default 100)" << std::endl;
+
+    Grid_finalize();
+    return 1;
+  }
+  std::string config = argv[1];
+
+  Args args;
+  GridCmdOptionIntVector(argv[2], args.GparityDirs);
+  assert(args.GparityDirs.size() == 3);
+
+  std::string action_s = "Mobius"; 
+  
+  for(int i=3;i<argc;i++){
+    std::string sarg = argv[i];
+    if(sarg == "--Ls"){
+      args.Ls = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set Ls to " << args.Ls << std::endl;
+    }else if(sarg == "--mass"){
+      std::istringstream ss(argv[i+1]); ss >> args.mass;
+      std::cout << GridLogMessage << "Set quark mass to " << args.mass << std::endl;
+    }else if(sarg == "--block"){
+      GridCmdOptionIntVector(argv[i+1], args.blockSize);
+      assert(args.blockSize.size() == 5);
+      std::cout << GridLogMessage << "Set block size to ";
+      for(int q=0;q<5;q++) std::cout << args.blockSize[q] << " ";
+      std::cout << std::endl;      
+    }else if(sarg == "--is_cps_cfg"){
+      args.is_cps_cfg = true;
+    }else if(sarg == "--write_irl_templ"){
+      XmlWriter writer("irl_templ.xml");
+      write(writer,"Params",args.fine);
+      Grid_finalize();
+      return 0;
+    }else if(sarg == "--read_irl_fine"){
+      std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", args.fine);
+    }else if(sarg == "--write_fine"){
+      args.write_fine = true;
+      args.write_fine_file = argv[i+1];
+    }else if(sarg == "--read_fine"){
+      args.read_fine = true;
+      args.read_fine_file = argv[i+1];
+    }else if(sarg == "--coarse_relax_tol"){
+      std::istringstream ss(argv[i+1]); ss >> args.coarse_relax_tol;
+      std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << args.coarse_relax_tol << std::endl;
+    }else if(sarg == "--action"){
+      action_s = argv[i+1];
+      std::cout << "Action set to " << action_s << std::endl;
+    }else if(sarg == "--mobius_scale"){
+      std::istringstream ss(argv[i+1]); ss >> args.mobius_scale;
+      std::cout << GridLogMessage << "Set Mobius scale to " << args.mobius_scale << std::endl;
+    }else if(sarg == "--basis_size"){
+      args.basis_size = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set basis size to " << args.basis_size << std::endl;
+    }
+  }
+  
+  //Fine grids
+  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  GridDefaultSimd(Nd,vComplex::Nsimd()),   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(args.Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(args.Ls,UGrid);
+
+  LatticeGaugeField Umu(UGrid);  
+  
+  bool is_gparity = false;
+  for(auto g : args.GparityDirs) if(g) is_gparity = true;
+
+  double bmc =  1.;      
+  double b = (args.mobius_scale + bmc)/2.;  // b = 1/2 [ (b+c) + (b-c) ]
+  double c = (args.mobius_scale - bmc)/2.;  // c = 1/2 [ (b+c) - (b-c) ]
+    
+  if(is_gparity){
+    GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs);
+    readConfiguration<ConjugateGimplD>(Umu, config, args.is_cps_cfg);   //Read the gauge field
+    
+    if(action_s == "DWF"){    
+      GparityDomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params);
+      run(action, config, args);
+    }else if(action_s == "Mobius"){
+      GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
+      run(action, config, args);	    
+    }      
+  }else{
+    WilsonImplD::ImplParams Params = setupParams();
+    readConfiguration<PeriodicGimplD>(Umu, config, args.is_cps_cfg);   //Read the gauge field
+    
+    if(action_s == "DWF"){    
+      DomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params);
+      run(action, config, args);
+    }else if(action_s == "Mobius"){
+      MobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
+      run(action, config, args);	    
+    }
+  } 
+  
+  Grid_finalize();
+}