diff --git a/Grid/DisableWarnings.h b/Grid/DisableWarnings.h
index 4bd1edd0..a725fc76 100644
--- a/Grid/DisableWarnings.h
+++ b/Grid/DisableWarnings.h
@@ -34,6 +34,9 @@ directory
 
 #if defined __GNUC__ && __GNUC__>=6
 #pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+#if defined __GNUC__ 
+#pragma GCC diagnostic ignored "-Wpsabi"
 #endif
 
  //disables and intel compiler specific warning (in json.hpp)
diff --git a/Grid/GridQCDcore.h b/Grid/GridQCDcore.h
index cae6f43f..065b62cd 100644
--- a/Grid/GridQCDcore.h
+++ b/Grid/GridQCDcore.h
@@ -36,6 +36,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #include <Grid/GridCore.h>
 #include <Grid/qcd/QCD.h>
 #include <Grid/qcd/spin/Spin.h>
+#include <Grid/qcd/gparity/Gparity.h>
 #include <Grid/qcd/utils/Utils.h>
 #include <Grid/qcd/representations/Representations.h>
 NAMESPACE_CHECK(GridQCDCore);
diff --git a/Grid/algorithms/Algorithms.h b/Grid/algorithms/Algorithms.h
index 7f27784b..47a0a92b 100644
--- a/Grid/algorithms/Algorithms.h
+++ b/Grid/algorithms/Algorithms.h
@@ -54,6 +54,7 @@ NAMESPACE_CHECK(BiCGSTAB);
 #include <Grid/algorithms/iterative/SchurRedBlack.h>
 #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
+#include <Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h>
 #include <Grid/algorithms/iterative/BiCGSTABMixedPrec.h>
 #include <Grid/algorithms/iterative/BlockConjugateGradient.h>
 #include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h
index 584ed1d5..a3ddb75f 100644
--- a/Grid/algorithms/approx/Chebyshev.h
+++ b/Grid/algorithms/approx/Chebyshev.h
@@ -292,6 +292,7 @@ public:
 template<class Field>
 class ChebyshevLanczos : public Chebyshev<Field> {
 private:
+
   std::vector<RealD> Coeffs;
   int order;
   RealD alpha;
diff --git a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
index 08942097..8f400625 100644
--- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
+++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
@@ -48,6 +48,7 @@ NAMESPACE_BEGIN(Grid);
     Integer TotalInnerIterations; //Number of inner CG iterations
     Integer TotalOuterIterations; //Number of restarts
     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+    RealD TrueResidual;
 
     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
     LinearFunction<FieldF> *guesser;
@@ -79,6 +80,11 @@ NAMESPACE_BEGIN(Grid);
     RealD stop = src_norm * Tolerance*Tolerance;
 
     GridBase* DoublePrecGrid = src_d_in.Grid();
+
+    //Generate precision change workspaces
+    precisionChangeWorkspace wk_dp_from_sp(DoublePrecGrid, SinglePrecGrid);
+    precisionChangeWorkspace wk_sp_from_dp(SinglePrecGrid, DoublePrecGrid);
+
     FieldD tmp_d(DoublePrecGrid);
     tmp_d.Checkerboard() = cb;
     
@@ -119,7 +125,7 @@ NAMESPACE_BEGIN(Grid);
       while(norm * inner_tol * inner_tol < stop) inner_tol *= 2;  // inner_tol = sqrt(stop/norm) ??
 
       PrecChangeTimer.Start();
-      precisionChange(src_f, src_d);
+      precisionChange(src_f, src_d, wk_sp_from_dp);
       PrecChangeTimer.Stop();
       
       sol_f = Zero();
@@ -137,7 +143,7 @@ NAMESPACE_BEGIN(Grid);
       
       //Convert sol back to double and add to double prec solution
       PrecChangeTimer.Start();
-      precisionChange(tmp_d, sol_f);
+      precisionChange(tmp_d, sol_f, wk_dp_from_sp);
       PrecChangeTimer.Stop();
       
       axpy(sol_d, 1.0, tmp_d, sol_d);
@@ -149,6 +155,7 @@ NAMESPACE_BEGIN(Grid);
     ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
     CG_d(Linop_d, src_d_in, sol_d);
     TotalFinalStepIterations = CG_d.IterationsToComplete;
+    TrueResidual = CG_d.TrueResidual;
 
     TotalTimer.Stop();
     std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
diff --git a/Grid/algorithms/iterative/ConjugateGradientMultiShift.h b/Grid/algorithms/iterative/ConjugateGradientMultiShift.h
index 63a6c55f..3b079e99 100644
--- a/Grid/algorithms/iterative/ConjugateGradientMultiShift.h
+++ b/Grid/algorithms/iterative/ConjugateGradientMultiShift.h
@@ -52,7 +52,7 @@ public:
   MultiShiftFunction shifts;
   std::vector<RealD> TrueResidualShift;
 
-  ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : 
+  ConjugateGradientMultiShift(Integer maxit, const MultiShiftFunction &_shifts) : 
     MaxIterations(maxit),
     shifts(_shifts)
   { 
@@ -182,6 +182,9 @@ public:
     for(int s=0;s<nshift;s++) {
       axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
     }
+
+    std::cout << GridLogIterative << "ConjugateGradientMultiShift: initial rn (|src|^2) =" << rn << " qq (|MdagM src|^2) =" << qq << " d ( dot(src, [MdagM + m_0]src) ) =" << d << " c=" << c << std::endl;
+    
   
   ///////////////////////////////////////
   // Timers
diff --git a/Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
new file mode 100644
index 00000000..e0cad13b
--- /dev/null
+++ b/Grid/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
@@ -0,0 +1,411 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
+
+    Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: Christopher Kelly <ckelly@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
+#define GRID_CONJUGATE_GRADIENT_MULTI_SHIFT_MIXEDPREC_H
+
+NAMESPACE_BEGIN(Grid);
+
+//CK 2020: A variant of the multi-shift conjugate gradient with the matrix multiplication in single precision. 
+//The residual is stored in single precision, but the search directions and solution are stored in double precision. 
+//Every update_freq iterations the residual is corrected in double precision. 
+    
+//For safety the a final regular CG is applied to clean up if necessary
+
+//Linop to add shift to input linop, used in cleanup CG
+namespace ConjugateGradientMultiShiftMixedPrecSupport{
+template<typename Field>
+class ShiftedLinop: public LinearOperatorBase<Field>{
+public:
+  LinearOperatorBase<Field> &linop_base;
+  RealD shift;
+
+  ShiftedLinop(LinearOperatorBase<Field> &_linop_base, RealD _shift): linop_base(_linop_base), shift(_shift){}
+
+  void OpDiag (const Field &in, Field &out){ assert(0); }
+  void OpDir  (const Field &in, Field &out,int dir,int disp){ assert(0); }
+  void OpDirAll  (const Field &in, std::vector<Field> &out){ assert(0); }
+  
+  void Op     (const Field &in, Field &out){ assert(0); }
+  void AdjOp  (const Field &in, Field &out){ assert(0); }
+
+  void HermOp(const Field &in, Field &out){
+    linop_base.HermOp(in, out);
+    axpy(out, shift, in, out);
+  }    
+
+  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
+    HermOp(in,out);
+    ComplexD dot = innerProduct(in,out);
+    n1=real(dot);
+    n2=norm2(out);
+  }
+};
+};
+
+
+template<class FieldD, class FieldF,
+	 typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
+	 typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
+class ConjugateGradientMultiShiftMixedPrec : public OperatorMultiFunction<FieldD>,
+					     public OperatorFunction<FieldD>
+{
+public:                                                
+
+  using OperatorFunction<FieldD>::operator();
+
+  RealD   Tolerance;
+  Integer MaxIterations;
+  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
+  std::vector<int> IterationsToCompleteShift;  // Iterations for this shift
+  int verbose;
+  MultiShiftFunction shifts;
+  std::vector<RealD> TrueResidualShift;
+
+  int ReliableUpdateFreq; //number of iterations between reliable updates
+
+  GridBase* SinglePrecGrid; //Grid for single-precision fields
+  LinearOperatorBase<FieldF> &Linop_f; //single precision
+
+  ConjugateGradientMultiShiftMixedPrec(Integer maxit, const MultiShiftFunction &_shifts,
+				       GridBase* _SinglePrecGrid, LinearOperatorBase<FieldF> &_Linop_f,
+				       int _ReliableUpdateFreq
+				       ) : 
+    MaxIterations(maxit),  shifts(_shifts), SinglePrecGrid(_SinglePrecGrid), Linop_f(_Linop_f), ReliableUpdateFreq(_ReliableUpdateFreq)
+  { 
+    verbose=1;
+    IterationsToCompleteShift.resize(_shifts.order);
+    TrueResidualShift.resize(_shifts.order);
+  }
+
+  void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, FieldD &psi)
+  {
+    GridBase *grid = src.Grid();
+    int nshift = shifts.order;
+    std::vector<FieldD> results(nshift,grid);
+    (*this)(Linop,src,results,psi);
+  }
+  void operator() (LinearOperatorBase<FieldD> &Linop, const FieldD &src, std::vector<FieldD> &results, FieldD &psi)
+  {
+    int nshift = shifts.order;
+
+    (*this)(Linop,src,results);
+  
+    psi = shifts.norm*src;
+    for(int i=0;i<nshift;i++){
+      psi = psi + shifts.residues[i]*results[i];
+    }
+
+    return;
+  }
+
+  void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
+  { 
+    GridBase *DoublePrecGrid = src_d.Grid();
+    precisionChangeWorkspace wk_f_from_d(SinglePrecGrid, DoublePrecGrid);
+    precisionChangeWorkspace wk_d_from_f(DoublePrecGrid, SinglePrecGrid);
+
+    ////////////////////////////////////////////////////////////////////////
+    // Convenience references to the info stored in "MultiShiftFunction"
+    ////////////////////////////////////////////////////////////////////////
+    int nshift = shifts.order;
+
+    std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
+    std::vector<RealD> &mresidual(shifts.tolerances);
+    std::vector<RealD> alpha(nshift,1.0);
+
+    //Double precision search directions
+    FieldD p_d(DoublePrecGrid);
+    std::vector<FieldD> ps_d(nshift, DoublePrecGrid);// Search directions (double precision)
+
+    FieldD tmp_d(DoublePrecGrid);
+    FieldD r_d(DoublePrecGrid);
+    FieldD mmp_d(DoublePrecGrid);
+
+    assert(psi_d.size()==nshift);
+    assert(mass.size()==nshift);
+    assert(mresidual.size()==nshift);
+  
+    // dynamic sized arrays on stack; 2d is a pain with vector
+    RealD  bs[nshift];
+    RealD  rsq[nshift];
+    RealD  z[nshift][2];
+    int     converged[nshift];
+  
+    const int       primary =0;
+  
+    //Primary shift fields CG iteration
+    RealD a,b,c,d;
+    RealD cp,bp,qq; //prev
+  
+    // Matrix mult fields
+    FieldF r_f(SinglePrecGrid);
+    FieldF p_f(SinglePrecGrid);
+    FieldF tmp_f(SinglePrecGrid);
+    FieldF mmp_f(SinglePrecGrid);
+    FieldF src_f(SinglePrecGrid);
+    precisionChange(src_f, src_d, wk_f_from_d);
+
+    // Check lightest mass
+    for(int s=0;s<nshift;s++){
+      assert( mass[s]>= mass[primary] );
+      converged[s]=0;
+    }
+  
+    // Wire guess to zero
+    // Residuals "r" are src
+    // First search direction "p" is also src
+    cp = norm2(src_d);
+
+    // Handle trivial case of zero src.
+    if( cp == 0. ){
+      for(int s=0;s<nshift;s++){
+	psi_d[s] = Zero();
+	IterationsToCompleteShift[s] = 1;
+	TrueResidualShift[s] = 0.;
+      }
+      return;
+    }
+
+    for(int s=0;s<nshift;s++){
+      rsq[s] = cp * mresidual[s] * mresidual[s];
+      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
+      ps_d[s] = src_d;
+    }
+    // r and p for primary
+    r_f=src_f; //residual maintained in single
+    p_f=src_f;
+    p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
+  
+    //MdagM+m[0]
+    Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p        d=real(dot(p, mmp)),  qq=norm2(mmp)
+    axpy(mmp_f,mass[0],p_f,mmp_f);
+    RealD rn = norm2(p_f);
+    d += rn*mass[0];
+
+    b = -cp /d;
+  
+    // Set up the various shift variables
+    int       iz=0;
+    z[0][1-iz] = 1.0;
+    z[0][iz]   = 1.0;
+    bs[0]      = b;
+    for(int s=1;s<nshift;s++){
+      z[s][1-iz] = 1.0;
+      z[s][iz]   = 1.0/( 1.0 - b*(mass[s]-mass[0]));
+      bs[s]      = b*z[s][iz]; 
+    }
+  
+    // r += b[0] A.p[0]
+    // c= norm(r)
+    c=axpy_norm(r_f,b,mmp_f,r_f);
+  
+    for(int s=0;s<nshift;s++) {
+      axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
+    }
+  
+    ///////////////////////////////////////
+    // Timers
+    ///////////////////////////////////////
+    GridStopWatch AXPYTimer, ShiftTimer, QRTimer, MatrixTimer, SolverTimer, PrecChangeTimer, CleanupTimer;
+
+    SolverTimer.Start();
+  
+    // Iteration loop
+    int k;
+  
+    for (k=1;k<=MaxIterations;k++){    
+      a = c /cp;
+
+      //Update double precision search direction by residual
+      PrecChangeTimer.Start();
+      precisionChange(r_d, r_f, wk_d_from_f);
+      PrecChangeTimer.Stop();
+
+      AXPYTimer.Start();
+      axpy(p_d,a,p_d,r_d); 
+
+      for(int s=0;s<nshift;s++){
+	if ( ! converged[s] ) { 
+	  if (s==0){
+	    axpy(ps_d[s],a,ps_d[s],r_d);
+	  } else{
+	    RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
+	    axpby(ps_d[s],z[s][iz],as,r_d,ps_d[s]);
+	  }
+	}
+      }
+      AXPYTimer.Stop();
+
+      PrecChangeTimer.Start();
+      precisionChange(p_f, p_d, wk_f_from_d); //get back single prec search direction for linop
+      PrecChangeTimer.Stop();
+
+      cp=c;
+      MatrixTimer.Start();  
+      Linop_f.HermOp(p_f,mmp_f); 
+      d=real(innerProduct(p_f,mmp_f));    
+      MatrixTimer.Stop();  
+
+      AXPYTimer.Start();
+      axpy(mmp_f,mass[0],p_f,mmp_f);
+      AXPYTimer.Stop();
+      RealD rn = norm2(p_f);
+      d += rn*mass[0];
+    
+      bp=b;
+      b=-cp/d;
+    
+      // Toggle the recurrence history
+      bs[0] = b;
+      iz = 1-iz;
+      ShiftTimer.Start();
+      for(int s=1;s<nshift;s++){
+	if((!converged[s])){
+	  RealD z0 = z[s][1-iz];
+	  RealD z1 = z[s][iz];
+	  z[s][iz] = z0*z1*bp
+	    / (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b)); 
+	  bs[s] = b*z[s][iz]/z0; // NB sign  rel to Mike
+	}
+      }
+      ShiftTimer.Stop();
+
+      //Update double precision solutions
+      AXPYTimer.Start();
+      for(int s=0;s<nshift;s++){
+	int ss = s;
+	if( (!converged[s]) ) { 
+	  axpy(psi_d[ss],-bs[s]*alpha[s],ps_d[s],psi_d[ss]);
+	}
+      }
+
+      //Perform reliable update if necessary; otherwise update residual from single-prec mmp
+      RealD c_f = axpy_norm(r_f,b,mmp_f,r_f);
+      AXPYTimer.Stop();
+
+      c = c_f;
+
+      if(k % ReliableUpdateFreq == 0){
+	//Replace r with true residual
+	MatrixTimer.Start();  
+	Linop_d.HermOp(psi_d[0],mmp_d); 
+	MatrixTimer.Stop();  
+
+	AXPYTimer.Start();
+	axpy(mmp_d,mass[0],psi_d[0],mmp_d);
+
+	RealD c_d = axpy_norm(r_d, -1.0, mmp_d, src_d);
+	AXPYTimer.Stop();
+
+	std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<< ", replaced |r|^2 = "<<c_f <<" with |r|^2 = "<<c_d<<std::endl;
+	
+	PrecChangeTimer.Start();
+	precisionChange(r_f, r_d, wk_f_from_d);
+	PrecChangeTimer.Stop();
+	c = c_d;
+      }
+    
+      // Convergence checks
+      int all_converged = 1;
+      for(int s=0;s<nshift;s++){
+      
+	if ( (!converged[s]) ){
+	  IterationsToCompleteShift[s] = k;
+	
+	  RealD css  = c * z[s][iz]* z[s][iz];
+	
+	  if(css<rsq[s]){
+	    if ( ! converged[s] )
+	      std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
+	    converged[s]=1;
+	  } else {
+	    all_converged=0;
+	  }
+
+	}
+      }
+
+      if ( all_converged ){
+
+	SolverTimer.Stop();
+	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: All shifts have converged iteration "<<k<<std::endl;
+	std::cout<<GridLogMessage<< "ConjugateGradientMultiShiftMixedPrec: Checking solutions"<<std::endl;
+      
+	// Check answers 
+	for(int s=0; s < nshift; s++) { 
+	  Linop_d.HermOpAndNorm(psi_d[s],mmp_d,d,qq);
+	  axpy(tmp_d,mass[s],psi_d[s],mmp_d);
+	  axpy(r_d,-alpha[s],src_d,tmp_d);
+	  RealD rn = norm2(r_d);
+	  RealD cn = norm2(src_d);
+	  TrueResidualShift[s] = std::sqrt(rn/cn);
+	  std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: shift["<<s<<"] true residual "<< TrueResidualShift[s] << " target " << mresidual[s] << std::endl;
+
+	  //If we have not reached the desired tolerance, do a (mixed precision) CG cleanup
+	  if(rn >= rsq[s]){
+	    CleanupTimer.Start();
+	    std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrec: performing cleanup step for shift " << s << std::endl;
+
+	    //Setup linear operators for final cleanup
+	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldD> Linop_shift_d(Linop_d, mass[s]);
+	    ConjugateGradientMultiShiftMixedPrecSupport::ShiftedLinop<FieldF> Linop_shift_f(Linop_f, mass[s]);
+					       
+	    MixedPrecisionConjugateGradient<FieldD,FieldF> cg(mresidual[s], MaxIterations, MaxIterations, SinglePrecGrid, Linop_shift_f, Linop_shift_d); 
+	    cg(src_d, psi_d[s]);
+	    
+	    TrueResidualShift[s] = cg.TrueResidual;
+	    CleanupTimer.Stop();
+	  }
+	}
+
+	std::cout << GridLogMessage << "ConjugateGradientMultiShiftMixedPrec: Time Breakdown for body"<<std::endl;
+	std::cout << GridLogMessage << "\tSolver    " << SolverTimer.Elapsed()     <<std::endl;
+	std::cout << GridLogMessage << "\t\tAXPY    " << AXPYTimer.Elapsed()     <<std::endl;
+	std::cout << GridLogMessage << "\t\tMatrix    " << MatrixTimer.Elapsed()     <<std::endl;
+	std::cout << GridLogMessage << "\t\tShift    " << ShiftTimer.Elapsed()     <<std::endl;
+	std::cout << GridLogMessage << "\t\tPrecision Change " << PrecChangeTimer.Elapsed()     <<std::endl;
+	std::cout << GridLogMessage << "\tFinal Cleanup " << CleanupTimer.Elapsed()     <<std::endl;
+	std::cout << GridLogMessage << "\tSolver+Cleanup " << SolverTimer.Elapsed() + CleanupTimer.Elapsed() << std::endl;
+
+	IterationsToComplete = k;	
+
+	return;
+      }
+
+   
+    }
+    // ugly hack
+    std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
+    //  assert(0);
+  }
+
+};
+NAMESPACE_END(Grid);
+#endif
diff --git a/Grid/lattice/Lattice.h b/Grid/lattice/Lattice.h
index 9f5f1da7..c4adf86a 100644
--- a/Grid/lattice/Lattice.h
+++ b/Grid/lattice/Lattice.h
@@ -46,3 +46,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/lattice/Lattice_unary.h>
 #include <Grid/lattice/Lattice_transfer.h>
 #include <Grid/lattice/Lattice_basis.h>
+#include <Grid/lattice/Lattice_crc.h>
diff --git a/Grid/lattice/Lattice_crc.h b/Grid/lattice/Lattice_crc.h
new file mode 100644
index 00000000..a283ea8b
--- /dev/null
+++ b/Grid/lattice/Lattice_crc.h
@@ -0,0 +1,42 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/lattice/Lattice_crc.h
+
+    Copyright (C) 2021
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#pragma once
+
+NAMESPACE_BEGIN(Grid);
+
+template<class vobj> uint32_t crc(Lattice<vobj> & buf)
+{
+  autoView( buf_v , buf, CpuRead);
+  return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
+}
+
+#define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
+
+NAMESPACE_END(Grid);
+
+
diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h
index e5e63716..5b66ff34 100644
--- a/Grid/lattice/Lattice_rng.h
+++ b/Grid/lattice/Lattice_rng.h
@@ -32,8 +32,9 @@
 #include <random>
 
 #ifdef RNG_SITMO
-#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
+#include <Grid/random/sitmo_prng_engine.hpp>
 #endif 
+#include <Grid/random/gaussian.h>
 
 #if defined(RNG_SITMO)
 #define RNG_FAST_DISCARD
@@ -142,8 +143,8 @@ public:
 
   std::vector<RngEngine>                             _generators;
   std::vector<std::uniform_real_distribution<RealD> > _uniform;
-  std::vector<std::normal_distribution<RealD> >       _gaussian;
-  std::vector<std::discrete_distribution<int32_t> >   _bernoulli;
+  std::vector<Grid::gaussian_distribution<RealD> >       _gaussian;
+  //  std::vector<std::discrete_distribution<int32_t> >   _bernoulli;
   std::vector<std::uniform_int_distribution<uint32_t> > _uid;
 
   ///////////////////////
@@ -243,8 +244,8 @@ public:
   GridSerialRNG() : GridRNGbase() {
     _generators.resize(1);
     _uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
-    _gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
-    _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
+    _gaussian.resize(1,gaussian_distribution<RealD>(0.0,1.0) );
+    //    _bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
     _uid.resize(1,std::uniform_int_distribution<uint32_t>() );
   }
 
@@ -357,8 +358,8 @@ public:
 
     _generators.resize(_vol);
     _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
-    _gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
-    _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
+    _gaussian.resize(_vol,gaussian_distribution<RealD>(0.0,1.0) );
+    //    _bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
     _uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
   }
 
@@ -515,11 +516,11 @@ public:
 
 template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l)   { rng.fill(l,rng._uniform);  }
 template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
-template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
+//template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
 
 template <class sobj> inline void random(GridSerialRNG &rng,sobj &l)   { rng.fill(l,rng._uniform  ); }
 template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
-template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
+//template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
 
 NAMESPACE_END(Grid);
 #endif
diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h
index 5a26cce9..7adf09d7 100644
--- a/Grid/lattice/Lattice_transfer.h
+++ b/Grid/lattice/Lattice_transfer.h
@@ -777,7 +777,7 @@ void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int
 
 
 template<class vobj>
-void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
+void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
 {
   typedef typename vobj::scalar_object sobj;
 
@@ -1002,54 +1002,96 @@ vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
   });
 }
 
-//Convert a Lattice from one precision to another
-template<class VobjOut, class VobjIn>
-void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
-{
-  assert(out.Grid()->Nd() == in.Grid()->Nd());
-  for(int d=0;d<out.Grid()->Nd();d++){
-    assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]);
-  }
-  out.Checkerboard() = in.Checkerboard();
-  GridBase *in_grid=in.Grid();
-  GridBase *out_grid = out.Grid();
-
-  typedef typename VobjOut::scalar_object SobjOut;
-  typedef typename VobjIn::scalar_object SobjIn;
-
-  int ndim = out.Grid()->Nd();
-  int out_nsimd = out_grid->Nsimd();
-    
-  std::vector<Coordinate > out_icoor(out_nsimd);
-      
-  for(int lane=0; lane < out_nsimd; lane++){
-    out_icoor[lane].resize(ndim);
-    out_grid->iCoorFromIindex(out_icoor[lane], lane);
-  }
-        
-  std::vector<SobjOut> in_slex_conv(in_grid->lSites());
-  unvectorizeToLexOrdArray(in_slex_conv, in);
-    
-  autoView( out_v , out, CpuWrite);
-  thread_for(out_oidx,out_grid->oSites(),{
-    Coordinate out_ocoor(ndim);
-    out_grid->oCoorFromOindex(out_ocoor, out_oidx);
-
-    ExtractPointerArray<SobjOut> ptrs(out_nsimd);      
-
-    Coordinate lcoor(out_grid->Nd());
-      
-    for(int lane=0; lane < out_nsimd; lane++){
-      for(int mu=0;mu<ndim;mu++)
-	lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu];
-	
-      int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
-      ptrs[lane] = &in_slex_conv[llex];
+//The workspace for a precision change operation allowing for the reuse of the mapping to save time on subsequent calls
+class precisionChangeWorkspace{
+  std::pair<Integer,Integer>* fmap_device; //device pointer
+public:
+  precisionChangeWorkspace(GridBase *out_grid, GridBase *in_grid){
+    //Build a map between the sites and lanes of the output field and the input field as we cannot use the Grids on the device
+    assert(out_grid->Nd() == in_grid->Nd());
+    for(int d=0;d<out_grid->Nd();d++){
+      assert(out_grid->FullDimensions()[d] == in_grid->FullDimensions()[d]);
     }
-    merge(out_v[out_oidx], ptrs, 0);
-  });
+    int Nsimd_out = out_grid->Nsimd();
+
+    std::vector<Coordinate> out_icorrs(out_grid->Nsimd()); //reuse these
+    for(int lane=0; lane < out_grid->Nsimd(); lane++)
+      out_grid->iCoorFromIindex(out_icorrs[lane], lane);
+  
+    std::vector<std::pair<Integer,Integer> > fmap_host(out_grid->lSites()); //lsites = osites*Nsimd
+    thread_for(out_oidx,out_grid->oSites(),{
+	Coordinate out_ocorr; 
+	out_grid->oCoorFromOindex(out_ocorr, out_oidx);
+      
+	Coordinate lcorr; //the local coordinate (common to both in and out as full coordinate)
+	for(int out_lane=0; out_lane < Nsimd_out; out_lane++){
+	  out_grid->InOutCoorToLocalCoor(out_ocorr, out_icorrs[out_lane], lcorr);
+	
+	  //int in_oidx = in_grid->oIndex(lcorr), in_lane = in_grid->iIndex(lcorr);
+	  //Note oIndex and OcorrFromOindex (and same for iIndex) are not inverse for checkerboarded lattice, the former coordinates being defined on the full lattice and the latter on the reduced lattice
+	  //Until this is fixed we need to circumvent the problem locally. Here I will use the coordinates defined on the reduced lattice for simplicity
+	  int in_oidx = 0, in_lane = 0;
+	  for(int d=0;d<in_grid->_ndimension;d++){
+	    in_oidx += in_grid->_ostride[d] * ( lcorr[d] % in_grid->_rdimensions[d] );
+	    in_lane += in_grid->_istride[d] * ( lcorr[d] / in_grid->_rdimensions[d] );
+	  }
+	  fmap_host[out_lane + Nsimd_out*out_oidx] = std::pair<Integer,Integer>( in_oidx, in_lane );
+	}
+      });
+
+    //Copy the map to the device (if we had a way to tell if an accelerator is in use we could avoid this copy for CPU-only machines)
+    size_t fmap_bytes = out_grid->lSites() * sizeof(std::pair<Integer,Integer>);
+    fmap_device = (std::pair<Integer,Integer>*)acceleratorAllocDevice(fmap_bytes);
+    acceleratorCopyToDevice(fmap_host.data(), fmap_device, fmap_bytes); 
+  }
+
+  //Prevent moving or copying
+  precisionChangeWorkspace(const precisionChangeWorkspace &r) = delete;
+  precisionChangeWorkspace(precisionChangeWorkspace &&r) = delete;
+  precisionChangeWorkspace &operator=(const precisionChangeWorkspace &r) = delete;
+  precisionChangeWorkspace &operator=(precisionChangeWorkspace &&r) = delete;
+  
+  std::pair<Integer,Integer> const* getMap() const{ return fmap_device; }
+
+  ~precisionChangeWorkspace(){
+    acceleratorFreeDevice(fmap_device);
+  }
+};
+
+
+//Convert a lattice of one precision to another. The input workspace contains the mapping data.
+template<class VobjOut, class VobjIn>
+void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in, const precisionChangeWorkspace &workspace){
+  static_assert( std::is_same<typename VobjOut::DoublePrecision, typename VobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
+
+  out.Checkerboard() = in.Checkerboard();
+  constexpr int Nsimd_out = VobjOut::Nsimd();
+
+  std::pair<Integer,Integer> const* fmap_device = workspace.getMap();
+
+  //Do the copy/precision change
+  autoView( out_v , out, AcceleratorWrite);
+  autoView( in_v , in, AcceleratorRead);
+
+  accelerator_for(out_oidx, out.Grid()->oSites(), 1,{
+      std::pair<Integer,Integer> const* fmap_osite = fmap_device + out_oidx*Nsimd_out;
+      for(int out_lane=0; out_lane < Nsimd_out; out_lane++){      
+	int in_oidx = fmap_osite[out_lane].first;
+	int in_lane = fmap_osite[out_lane].second;
+	copyLane(out_v[out_oidx], out_lane, in_v[in_oidx], in_lane);
+      }
+    });
 }
 
+//Convert a Lattice from one precision to another
+//Generate the workspace in place; if multiple calls with the same mapping are performed, consider pregenerating the workspace and reusing
+template<class VobjOut, class VobjIn>
+void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
+  precisionChangeWorkspace workspace(out.Grid(), in.Grid());
+  precisionChange(out, in, workspace);
+}
+
+
 ////////////////////////////////////////////////////////////////////////////////
 // Communicate between grids
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/Grid/log/Log.cc b/Grid/log/Log.cc
index 9302b4cc..31f9a3f3 100644
--- a/Grid/log/Log.cc
+++ b/Grid/log/Log.cc
@@ -69,6 +69,7 @@ GridLogger GridLogDebug  (1, "Debug", GridLogColours, "PURPLE");
 GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
 GridLogger GridLogIterative  (1, "Iterative", GridLogColours, "BLUE");
 GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
+GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
 
 void GridLogConfigure(std::vector<std::string> &logstreams) {
   GridLogError.Active(0);
@@ -79,6 +80,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
   GridLogPerformance.Active(0);
   GridLogIntegrator.Active(1);
   GridLogColours.Active(0);
+  GridLogHMC.Active(1);
 
   for (int i = 0; i < logstreams.size(); i++) {
     if (logstreams[i] == std::string("Error"))       GridLogError.Active(1);
@@ -87,7 +89,8 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
     if (logstreams[i] == std::string("Iterative"))   GridLogIterative.Active(1);
     if (logstreams[i] == std::string("Debug"))       GridLogDebug.Active(1);
     if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
-    if (logstreams[i] == std::string("Integrator"))  GridLogIntegrator.Active(1);
+    if (logstreams[i] == std::string("NoIntegrator"))  GridLogIntegrator.Active(0);
+    if (logstreams[i] == std::string("NoHMC"))         GridLogHMC.Active(0);
     if (logstreams[i] == std::string("Colours"))     GridLogColours.Active(1);
   }
 }
diff --git a/Grid/log/Log.h b/Grid/log/Log.h
index 68693647..b1696fee 100644
--- a/Grid/log/Log.h
+++ b/Grid/log/Log.h
@@ -182,6 +182,7 @@ extern GridLogger GridLogDebug  ;
 extern GridLogger GridLogPerformance;
 extern GridLogger GridLogIterative  ;
 extern GridLogger GridLogIntegrator  ;
+extern GridLogger GridLogHMC;
 extern Colours    GridLogColours;
 
 std::string demangle(const char* name) ;
diff --git a/Grid/parallelIO/NerscIO.h b/Grid/parallelIO/NerscIO.h
index 99011e25..88278131 100644
--- a/Grid/parallelIO/NerscIO.h
+++ b/Grid/parallelIO/NerscIO.h
@@ -39,9 +39,11 @@ using namespace Grid;
 ////////////////////////////////////////////////////////////////////////////////
 class NerscIO : public BinaryIO { 
 public:
-
   typedef Lattice<vLorentzColourMatrixD> GaugeField;
 
+  // Enable/disable exiting if the plaquette in the header does not match the value computed (default true)
+  static bool & exitOnReadPlaquetteMismatch(){ static bool v=true; return v; }
+
   static inline void truncate(std::string file){
     std::ofstream fout(file,std::ios::out);
   }
@@ -198,7 +200,7 @@ public:
       std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
       exit(0);
     }
-    assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
+    if(exitOnReadPlaquetteMismatch()) assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
     assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
     assert(nersc_csum == header.checksum );
       
diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h
index 858aead7..81356a66 100644
--- a/Grid/qcd/QCD.h
+++ b/Grid/qcd/QCD.h
@@ -63,6 +63,7 @@ static constexpr int Ngp=2; // gparity index range
 #define ColourIndex  (2)
 #define SpinIndex    (1)
 #define LorentzIndex (0)
+#define GparityFlavourIndex (0)
 
 // Also should make these a named enum type
 static constexpr int DaggerNo=0;
@@ -87,6 +88,8 @@ template<typename T> struct isCoarsened {
 template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
 template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
 
+const int GparityFlavourTensorIndex = 3; //TensorLevel counts from the bottom!
+
 // ChrisK very keen to add extra space for Gparity doubling.
 //
 // Also add domain wall index, in a way where Wilson operator 
@@ -110,8 +113,10 @@ template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVec
     template<typename vtype> using iSpinColourSpinColourMatrix  = iScalar<iMatrix<iMatrix<iMatrix<iMatrix<vtype, Nc>, Ns>, Nc>, Ns> >;
 
 
+template<typename vtype> using iGparityFlavourVector                = iVector<iScalar<iScalar<vtype> >, Ngp>;
 template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
 template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
+template<typename vtype> using iGparityFlavourMatrix = iMatrix<iScalar<iScalar<vtype> >, Ngp>;
 
 // Spin matrix
 typedef iSpinMatrix<Complex  >          SpinMatrix;
@@ -176,6 +181,16 @@ typedef iDoubleStoredColourMatrix<vComplex > vDoubleStoredColourMatrix;
 typedef iDoubleStoredColourMatrix<vComplexF> vDoubleStoredColourMatrixF;
 typedef iDoubleStoredColourMatrix<vComplexD> vDoubleStoredColourMatrixD;
 
+//G-parity flavour matrix
+typedef iGparityFlavourMatrix<Complex> GparityFlavourMatrix;
+typedef iGparityFlavourMatrix<ComplexF> GparityFlavourMatrixF;
+typedef iGparityFlavourMatrix<ComplexD> GparityFlavourMatrixD;
+
+typedef iGparityFlavourMatrix<vComplex> vGparityFlavourMatrix;
+typedef iGparityFlavourMatrix<vComplexF> vGparityFlavourMatrixF;
+typedef iGparityFlavourMatrix<vComplexD> vGparityFlavourMatrixD;
+
+
 // Spin vector
 typedef iSpinVector<Complex >           SpinVector;
 typedef iSpinVector<ComplexF>           SpinVectorF;
@@ -220,6 +235,16 @@ typedef iHalfSpinColourVector<ComplexD> HalfSpinColourVectorD;
 typedef iHalfSpinColourVector<vComplex > vHalfSpinColourVector;
 typedef iHalfSpinColourVector<vComplexF> vHalfSpinColourVectorF;
 typedef iHalfSpinColourVector<vComplexD> vHalfSpinColourVectorD;
+
+//G-parity flavour vector
+typedef iGparityFlavourVector<Complex >         GparityFlavourVector;
+typedef iGparityFlavourVector<ComplexF>         GparityFlavourVectorF;
+typedef iGparityFlavourVector<ComplexD>         GparityFlavourVectorD;
+
+typedef iGparityFlavourVector<vComplex >         vGparityFlavourVector;
+typedef iGparityFlavourVector<vComplexF>         vGparityFlavourVectorF;
+typedef iGparityFlavourVector<vComplexD>         vGparityFlavourVectorD;
+
     
 // singlets
 typedef iSinglet<Complex >         TComplex;     // FIXME This is painful. Tensor singlet complex type.
diff --git a/Grid/qcd/action/ActionParams.h b/Grid/qcd/action/ActionParams.h
index 0e6a11c6..12d2b91b 100644
--- a/Grid/qcd/action/ActionParams.h
+++ b/Grid/qcd/action/ActionParams.h
@@ -36,7 +36,8 @@ NAMESPACE_BEGIN(Grid);
 
 // These can move into a params header and be given MacroMagic serialisation
 struct GparityWilsonImplParams {
-  Coordinate twists;
+  Coordinate twists; //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction. 
+                     //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
   GparityWilsonImplParams() : twists(Nd, 0) {};
 };
   
@@ -85,6 +86,50 @@ struct StaggeredImplParams {
         precision(_precision),
         BoundsCheckFreq(_BoundsCheckFreq){};
   };
+
+
+  /*Action parameters for the generalized rational action
+    The approximation is for (M^dag M)^{1/inv_pow}
+    where inv_pow is the denominator of the fractional power.
+    Default inv_pow=2 for square root, making this equivalent to 
+    the OneFlavourRational action
+  */
+    struct RationalActionParams : Serializable {
+    GRID_SERIALIZABLE_CLASS_MEMBERS(RationalActionParams, 
+				    int, inv_pow, 
+				    RealD, lo, //low eigenvalue bound of rational approx
+				    RealD, hi, //high eigenvalue bound of rational approx
+				    int,   MaxIter,  //maximum iterations in msCG
+				    RealD, action_tolerance,  //msCG tolerance in action evaluation
+				    int,   action_degree, //rational approx tolerance in action evaluation
+				    RealD, md_tolerance,  //msCG tolerance in MD integration
+				    int,   md_degree, //rational approx tolerance in MD integration
+				    int,   precision, //precision of floating point arithmetic
+				    int,   BoundsCheckFreq); //frequency the approximation is tested (with Metropolis degree/tolerance); 0 disables the check
+  // constructor 
+  RationalActionParams(int _inv_pow = 2,
+		       RealD _lo      = 0.0, 
+		       RealD _hi      = 1.0, 
+		       int _maxit     = 1000,
+		       RealD _action_tolerance      = 1.0e-8, 
+		       int _action_degree    = 10,
+		       RealD _md_tolerance      = 1.0e-8, 
+		       int _md_degree    = 10,
+		       int _precision = 64,
+		       int _BoundsCheckFreq=20)
+    : inv_pow(_inv_pow), 
+      lo(_lo),
+      hi(_hi),
+      MaxIter(_maxit),
+      action_tolerance(_action_tolerance),
+      action_degree(_action_degree),
+      md_tolerance(_md_tolerance),
+      md_degree(_md_degree),
+      precision(_precision),
+      BoundsCheckFreq(_BoundsCheckFreq){};
+  };
+
+
   
 NAMESPACE_END(Grid);
 
diff --git a/Grid/qcd/action/fermion/GparityWilsonImpl.h b/Grid/qcd/action/fermion/GparityWilsonImpl.h
index 9dca403b..5b0a67d3 100644
--- a/Grid/qcd/action/fermion/GparityWilsonImpl.h
+++ b/Grid/qcd/action/fermion/GparityWilsonImpl.h
@@ -30,6 +30,18 @@ directory
 
 NAMESPACE_BEGIN(Grid);
 
+/*
+  Policy implementation for G-parity boundary conditions
+
+  Rather than treating the gauge field as a flavored field, the Grid implementation of G-parity treats the gauge field as a regular
+  field with complex conjugate boundary conditions. In order to ensure the second flavor interacts with the conjugate links and the first
+  with the regular links we overload the functionality of doubleStore, whose purpose is to store the gauge field and the barrel-shifted gauge field
+  to avoid communicating links when applying the Dirac operator, such that the double-stored field contains also a flavor index which maps to
+  either the link or the conjugate link. This flavored field is then used by multLink to apply the correct link to a spinor.
+
+  Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction. 
+  mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
+ */
 template <class S, class Representation = FundamentalRepresentation, class Options=CoeffReal>
 class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Representation::Dimension> > {
 public:
@@ -113,7 +125,7 @@ public:
     || ((distance== 1)&&(icoor[direction]==1))
     || ((distance==-1)&&(icoor[direction]==0));
 
-    permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu]; //only if we are going around the world
+    permute_lane = permute_lane && SE->_around_the_world && St.parameters.twists[mmu] && mmu < Nd-1; //only if we are going around the world in a spatial direction
 
     //Apply the links
     int f_upper = permute_lane ? 1 : 0;
@@ -139,10 +151,10 @@ public:
     assert((distance == 1) || (distance == -1));  // nearest neighbour stencil hard code
     assert((sl == 1) || (sl == 2));
 
-    if ( SE->_around_the_world && St.parameters.twists[mmu] ) {
-
+    //If this site is an global boundary site, perform the G-parity flavor twist
+    if ( mmu < Nd-1 && SE->_around_the_world && St.parameters.twists[mmu] ) {
       if ( sl == 2 ) {
-       
+	//Only do the twist for lanes on the edge of the physical node
 	ExtractBuffer<sobj> vals(Nsimd);
 
 	extract(chi,vals);
@@ -197,6 +209,19 @@ public:
     reg = memory;
   }
 
+
+  //Poke 'poke_f0' onto flavor 0 and 'poke_f1' onto flavor 1 in direction mu of the doubled gauge field Uds
+  inline void pokeGparityDoubledGaugeField(DoubledGaugeField &Uds, const GaugeLinkField &poke_f0, const GaugeLinkField &poke_f1, const int mu){
+    autoView(poke_f0_v, poke_f0, CpuRead);
+    autoView(poke_f1_v, poke_f1, CpuRead);
+    autoView(Uds_v, Uds, CpuWrite);
+    thread_foreach(ss,poke_f0_v,{
+	Uds_v[ss](0)(mu) = poke_f0_v[ss]();
+	Uds_v[ss](1)(mu) = poke_f1_v[ss]();
+      });
+  }
+    
+
   inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
   {
     conformable(Uds.Grid(),GaugeGrid);
@@ -207,14 +232,19 @@ public:
     GaugeLinkField Uconj(GaugeGrid);
    
     Lattice<iScalar<vInteger> > coor(GaugeGrid);
-        
-    for(int mu=0;mu<Nd;mu++){
-          
-      LatticeCoordinate(coor,mu);
+
+    //Here the first Nd-1 directions are treated as "spatial", and a twist value of 1 indicates G-parity BCs in that direction. 
+    //mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs        
+    for(int mu=0;mu<Nd-1;mu++){
+
+      if( Params.twists[mu] ){
+	LatticeCoordinate(coor,mu);
+      }
           
       U     = PeekIndex<LorentzIndex>(Umu,mu);
       Uconj = conjugate(U);
      
+      // Implement the isospin rotation sign on the boundary between f=1 and f=0
       // This phase could come from a simple bc 1,1,-1,1 ..
       int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
       if ( Params.twists[mu] ) { 
@@ -229,7 +259,7 @@ public:
 	thread_foreach(ss,U_v,{
 	    Uds_v[ss](0)(mu) = U_v[ss]();
 	    Uds_v[ss](1)(mu) = Uconj_v[ss]();
-	  });
+	});
       }
           
       U     = adj(Cshift(U    ,mu,-1));      // correct except for spanning the boundary
@@ -260,6 +290,38 @@ public:
         });
       }
     }
+
+    { //periodic / antiperiodic temporal BCs
+      int mu = Nd-1;
+      int L   = GaugeGrid->GlobalDimensions()[mu];
+      int Lmu = L - 1;
+
+      LatticeCoordinate(coor, mu);
+
+      U = PeekIndex<LorentzIndex>(Umu, mu); //Get t-directed links
+      
+      GaugeLinkField *Upoke = &U;
+
+      if(Params.twists[mu]){ //antiperiodic
+	Utmp =  where(coor == Lmu, -U, U);
+	Upoke = &Utmp;
+      }
+    
+      Uconj = conjugate(*Upoke); //second flavor interacts with conjugate links      
+      pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu);
+
+      //Get the barrel-shifted field
+      Utmp = adj(Cshift(U, mu, -1)); //is a forward shift!
+      Upoke = &Utmp;
+
+      if(Params.twists[mu]){
+	U = where(coor == 0, -Utmp, Utmp);  //boundary phase
+	Upoke = &U;
+      }
+      
+      Uconj = conjugate(*Upoke);
+      pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
+    }
   }
       
   inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
@@ -298,28 +360,48 @@ public:
   inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){
     assert(0);
   }
-  
+ 
   inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
-
-    int Ls = Btilde.Grid()->_fdimensions[0];
-        
-    GaugeLinkField tmp(mat.Grid());
-    tmp = Zero();
+    int Ls=Btilde.Grid()->_fdimensions[0];
+    
     {
-      autoView( tmp_v , tmp, CpuWrite);
-      autoView( Atilde_v , Atilde, CpuRead);
-      autoView( Btilde_v , Btilde, CpuRead);
-      thread_for(ss,tmp.Grid()->oSites(),{
-	  for (int s = 0; s < Ls; s++) {
-	    int sF = s + Ls * ss;
-	    auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
-	    tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
-	  }
-	});
+      GridBase *GaugeGrid = mat.Grid();
+      Lattice<iScalar<vInteger> > coor(GaugeGrid);
+
+      if( Params.twists[mu] ){
+	LatticeCoordinate(coor,mu);
+      }
+
+      autoView( mat_v , mat, AcceleratorWrite);
+      autoView( Btilde_v , Btilde, AcceleratorRead);
+      autoView( Atilde_v , Atilde, AcceleratorRead);
+      accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{	  
+  	  int sU=sss;
+  	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
+  	  ColorMatrixType sum;
+  	  zeroit(sum);
+  	  for(int s=0;s<Ls;s++){
+  	    int sF = s+Ls*sU;
+  	    for(int spn=0;spn<Ns;spn++){ //sum over spin
+	      //Flavor 0
+  	      auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
+  	      auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
+  	      sum = sum + outerProduct(bb,aa);
+
+  	      //Flavor 1
+  	      bb = coalescedRead(Btilde_v[sF](1)(spn) );
+  	      aa = coalescedRead(Atilde_v[sF](1)(spn) );
+  	      sum = sum + conjugate(outerProduct(bb,aa));
+  	    }
+  	  }	    
+  	  coalescedWrite(mat_v[sU](mu)(), sum);
+  	});
     }
-    PokeIndex<LorentzIndex>(mat, tmp, mu);
-    return;
   }
+
+
+  
+
   
 };
 
diff --git a/Grid/qcd/action/pseudofermion/Bounds.h b/Grid/qcd/action/pseudofermion/Bounds.h
index 535e1a49..47338db7 100644
--- a/Grid/qcd/action/pseudofermion/Bounds.h
+++ b/Grid/qcd/action/pseudofermion/Bounds.h
@@ -40,13 +40,66 @@ NAMESPACE_BEGIN(Grid);
       X=X-Y;
       RealD Nd = norm2(X);
       std::cout << "************************* "<<std::endl;
-      std::cout << " noise                         = "<<Nx<<std::endl;
-      std::cout << " (MdagM^-1/2)^2  noise         = "<<Nz<<std::endl;
-      std::cout << " MdagM (MdagM^-1/2)^2  noise   = "<<Ny<<std::endl;
-      std::cout << " noise - MdagM (MdagM^-1/2)^2  noise   = "<<Nd<<std::endl;
+      std::cout << " | noise |^2                         = "<<Nx<<std::endl;
+      std::cout << " | (MdagM^-1/2)^2  noise |^2         = "<<Nz<<std::endl;
+      std::cout << " | MdagM (MdagM^-1/2)^2  noise |^2   = "<<Ny<<std::endl;
+      std::cout << " | noise - MdagM (MdagM^-1/2)^2  noise |^2  = "<<Nd<<std::endl;
+      std::cout << " | noise - MdagM (MdagM^-1/2)^2  noise|/|noise| = " << std::sqrt(Nd/Nx) << std::endl;
       std::cout << "************************* "<<std::endl;
       assert( (std::sqrt(Nd/Nx)<tol) && " InverseSqrtBoundsCheck ");
     }
 
+    /* For a HermOp = M^dag M, check the approximation of  HermOp^{-1/inv_pow}
+       by computing   |X -    HermOp * [ Hermop^{-1/inv_pow} ]^{inv_pow} X|  < tol  
+       for noise X (aka GaussNoise).
+       ApproxNegPow should be the rational approximation for   X^{-1/inv_pow}
+    */
+    template<class Field> void InversePowerBoundsCheck(int inv_pow,
+						       int MaxIter,double tol,
+						       LinearOperatorBase<Field> &HermOp,
+						       Field &GaussNoise,
+						       MultiShiftFunction &ApproxNegPow) 
+    {
+      GridBase *FermionGrid = GaussNoise.Grid();
+
+      Field X(FermionGrid);
+      Field Y(FermionGrid);
+      Field Z(FermionGrid);
+
+      Field tmp1(FermionGrid), tmp2(FermionGrid);
+
+      X=GaussNoise;
+      RealD Nx = norm2(X);
+
+      ConjugateGradientMultiShift<Field> msCG(MaxIter,ApproxNegPow);
+
+      tmp1 = X;
+      
+      Field* in = &tmp1;
+      Field* out = &tmp2;
+      for(int i=0;i<inv_pow;i++){ //apply  [ Hermop^{-1/inv_pow}  ]^{inv_pow} X =   HermOp^{-1} X
+	msCG(HermOp, *in, *out); //backwards conventions!
+	if(i!=inv_pow-1) std::swap(in, out);
+      }
+      Z = *out;
+
+      RealD Nz = norm2(Z);
+
+      HermOp.HermOp(Z,Y);
+      RealD Ny = norm2(Y);
+
+      X=X-Y;
+      RealD Nd = norm2(X);
+      std::cout << "************************* "<<std::endl;
+      std::cout << " | noise |^2                         = "<<Nx<<std::endl;
+      std::cout << " | (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2        = "<<Nz<<std::endl;
+      std::cout << " | MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2   = "<<Ny<<std::endl;
+      std::cout << " | noise - MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |^2  = "<<Nd<<std::endl;
+      std::cout << " | noise - MdagM (MdagM^-1/" << inv_pow << ")^" << inv_pow << " noise |/| noise |  = "<<std::sqrt(Nd/Nx)<<std::endl;
+      std::cout << "************************* "<<std::endl;
+      assert( (std::sqrt(Nd/Nx)<tol) && " InversePowerBoundsCheck ");
+    }
+
+
 NAMESPACE_END(Grid);
 
diff --git a/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h
new file mode 100644
index 00000000..2b08cf49
--- /dev/null
+++ b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h
@@ -0,0 +1,372 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h
+
+    Copyright (C) 2015
+
+    Author: Christopher Kelly <ckelly@bnl.gov>
+    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H
+#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_H
+
+NAMESPACE_BEGIN(Grid);
+
+    /////////////////////////////////////////////////////////
+    // Generic rational approximation for ratios of operators
+    /////////////////////////////////////////////////////////
+
+    /* S_f = -log( det(  [M^dag M]/[V^dag V] )^{1/inv_pow}  )
+           = chi^dag ( [M^dag M]/[V^dag V] )^{-1/inv_pow} chi\
+	   = chi^dag ( [V^dag V]^{-1/2} [M^dag M] [V^dag V]^{-1/2} )^{-1/inv_pow} chi\
+	   = chi^dag [V^dag V]^{1/(2*inv_pow)} [M^dag M]^{-1/inv_pow} [V^dag V]^{1/(2*inv_pow)} chi\
+
+	   S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
+    
+       BIG WARNING:	   
+       Here V^dag V is referred to in this code as the "numerator" operator and M^dag M is the *denominator* operator.
+       this refers to their position in the pseudofermion action, which is the *inverse* of what appears in the determinant
+       Thus for DWF the numerator operator is the Pauli-Villars operator
+
+       Here P/Q \sim R_{1/(2*inv_pow)}  ~ (V^dagV)^{1/(2*inv_pow)}  
+       Here N/D \sim R_{-1/inv_pow} ~ (M^dagM)^{-1/inv_pow}  
+    */
+      
+    template<class Impl>
+    class GeneralEvenOddRatioRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
+    public:
+
+      INHERIT_IMPL_TYPES(Impl);
+
+      typedef RationalActionParams Params;
+      Params param;
+
+      //For action evaluation
+      MultiShiftFunction ApproxPowerAction   ;  //rational approx for X^{1/inv_pow}
+      MultiShiftFunction ApproxNegPowerAction;  //rational approx for X^{-1/inv_pow}
+      MultiShiftFunction ApproxHalfPowerAction;   //rational approx for X^{1/(2*inv_pow)}
+      MultiShiftFunction ApproxNegHalfPowerAction; //rational approx for X^{-1/(2*inv_pow)}
+
+      //For the MD integration
+      MultiShiftFunction ApproxPowerMD   ;  //rational approx for X^{1/inv_pow}
+      MultiShiftFunction ApproxNegPowerMD;  //rational approx for X^{-1/inv_pow}
+      MultiShiftFunction ApproxHalfPowerMD;   //rational approx for X^{1/(2*inv_pow)}
+      MultiShiftFunction ApproxNegHalfPowerMD; //rational approx for X^{-1/(2*inv_pow)}
+
+    private:
+     
+      FermionOperator<Impl> & NumOp;// the basic operator
+      FermionOperator<Impl> & DenOp;// the basic operator
+      FermionField PhiEven; // the pseudo fermion field for this trajectory
+      FermionField PhiOdd; // the pseudo fermion field for this trajectory
+
+      //Generate the approximation to x^{1/inv_pow} (->approx)   and x^{-1/inv_pow} (-> approx_inv)  by an approx_degree degree rational approximation
+      //CG_tolerance is used to issue a warning if the approximation error is larger than the tolerance of the CG and is otherwise just stored in the MultiShiftFunction for use by the multi-shift
+      static void generateApprox(MultiShiftFunction &approx, MultiShiftFunction &approx_inv, int inv_pow, int approx_degree, double CG_tolerance, AlgRemez &remez){
+	std::cout<<GridLogMessage << "Generating degree "<< approx_degree<<" approximation for x^(1/" << inv_pow << ")"<<std::endl;
+	double error = remez.generateApprox(approx_degree,1,inv_pow);	
+	if(error > CG_tolerance)
+	  std::cout<<GridLogMessage << "WARNING: Remez approximation has a larger error " << error << " than the CG tolerance " << CG_tolerance << "! Try increasing the number of poles" << std::endl;
+	
+	approx.Init(remez, CG_tolerance,false);
+	approx_inv.Init(remez, CG_tolerance,true);
+      }
+
+
+    protected:
+      static constexpr bool Numerator = true;
+      static constexpr bool Denominator = false;
+
+      //Allow derived classes to override the multishift CG
+      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, FermionField &out){
+	SchurDifferentiableOperator<Impl> schurOp(numerator ? NumOp : DenOp);
+	ConjugateGradientMultiShift<FermionField> msCG(MaxIter, approx);
+	msCG(schurOp,in, out);
+      }
+      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionField &in, std::vector<FermionField> &out_elems, FermionField &out){
+	SchurDifferentiableOperator<Impl> schurOp(numerator ? NumOp : DenOp);
+	ConjugateGradientMultiShift<FermionField> msCG(MaxIter, approx);
+	msCG(schurOp,in, out_elems, out);
+      }
+      //Allow derived classes to override the gauge import
+      virtual void ImportGauge(const GaugeField &U){
+	NumOp.ImportGauge(U);
+	DenOp.ImportGauge(U);
+      }
+      
+    public:
+
+      GeneralEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
+						     FermionOperator<Impl>  &_DenOp, 
+						     const Params & p
+						     ) : 
+	NumOp(_NumOp), 
+	DenOp(_DenOp), 
+	PhiOdd (_NumOp.FermionRedBlackGrid()),
+	PhiEven(_NumOp.FermionRedBlackGrid()),
+	param(p) 
+      {
+	std::cout<<GridLogMessage << action_name() << " initialize: starting" << std::endl;
+	AlgRemez remez(param.lo,param.hi,param.precision);
+
+	//Generate approximations for action eval
+	generateApprox(ApproxPowerAction, ApproxNegPowerAction, param.inv_pow, param.action_degree, param.action_tolerance, remez);
+	generateApprox(ApproxHalfPowerAction, ApproxNegHalfPowerAction, 2*param.inv_pow, param.action_degree, param.action_tolerance, remez);
+
+	//Generate approximations for MD
+	if(param.md_degree != param.action_degree){ //note the CG tolerance is unrelated to the stopping condition of the Remez algorithm
+	  generateApprox(ApproxPowerMD, ApproxNegPowerMD, param.inv_pow, param.md_degree, param.md_tolerance, remez);
+	  generateApprox(ApproxHalfPowerMD, ApproxNegHalfPowerMD, 2*param.inv_pow, param.md_degree, param.md_tolerance, remez);
+	}else{
+	  std::cout<<GridLogMessage << "Using same rational approximations for MD as for action evaluation" << std::endl;
+	  ApproxPowerMD = ApproxPowerAction; 
+	  ApproxNegPowerMD = ApproxNegPowerAction;
+	  for(int i=0;i<ApproxPowerMD.tolerances.size();i++)
+	    ApproxNegPowerMD.tolerances[i] = ApproxPowerMD.tolerances[i] = param.md_tolerance; //used for multishift
+
+	  ApproxHalfPowerMD = ApproxHalfPowerAction;
+	  ApproxNegHalfPowerMD = ApproxNegHalfPowerAction;
+	  for(int i=0;i<ApproxPowerMD.tolerances.size();i++)
+	    ApproxNegHalfPowerMD.tolerances[i] = ApproxHalfPowerMD.tolerances[i] = param.md_tolerance;
+	}
+
+	std::cout<<GridLogMessage << action_name() << " initialize: complete" << std::endl;
+      };
+
+      virtual std::string action_name(){return "GeneralEvenOddRatioRationalPseudoFermionAction";}
+
+      virtual std::string LogParameters(){
+	std::stringstream sstream;
+	sstream << GridLogMessage << "["<<action_name()<<"] Power              : 1/" << param.inv_pow <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Low                :" << param.lo <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] High               :" << param.hi <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Max iterations     :" << param.MaxIter <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Tolerance (Action) :" << param.action_tolerance <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Degree (Action)    :" << param.action_degree <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Tolerance (MD)     :" << param.md_tolerance <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Degree (MD)        :" << param.md_degree <<  std::endl;
+	sstream << GridLogMessage << "["<<action_name()<<"] Precision          :" << param.precision <<  std::endl;
+	return sstream.str();
+      }
+
+      //Access the fermion field
+      const FermionField &getPhiOdd() const{ return PhiOdd; }
+      
+      virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
+	std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
+	FermionField eta(NumOp.FermionGrid());	
+
+	// P(eta) \propto e^{- eta^dag eta}
+	//	
+	// The gaussian function draws from  P(x) \propto e^{- x^2 / 2 }    [i.e. sigma=1]
+	// Thus eta = x/sqrt{2} = x * sqrt(1/2)
+	RealD scale = std::sqrt(0.5);
+	gaussian(pRNG,eta);	eta=eta*scale;
+
+	refresh(U,eta);
+      }
+
+      //Allow for manual specification of random field for testing
+      void refresh(const GaugeField &U, const FermionField &eta) {
+
+	// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
+	//
+	// P(phi) = e^{- phi^dag (VdagV)^1/(2*inv_pow) (MdagM)^-1/inv_pow (VdagV)^1/(2*inv_pow) phi}
+	//        = e^{- phi^dag  (VdagV)^1/(2*inv_pow) (MdagM)^-1/(2*inv_pow) (MdagM)^-1/(2*inv_pow)  (VdagV)^1/(2*inv_pow) phi}
+	//
+	// Phi =  (VdagV)^-1/(2*inv_pow) Mdag^{1/(2*inv_pow)} eta 
+	
+	std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
+
+	FermionField etaOdd (NumOp.FermionRedBlackGrid());
+	FermionField etaEven(NumOp.FermionRedBlackGrid());
+	FermionField     tmp(NumOp.FermionRedBlackGrid());
+
+	pickCheckerboard(Even,etaEven,eta);
+	pickCheckerboard(Odd,etaOdd,eta);
+
+	ImportGauge(U);
+
+	// MdagM^1/(2*inv_pow) eta
+	std::cout<<GridLogMessage << action_name() << " refresh: doing (M^dag M)^{1/" << 2*param.inv_pow << "} eta" << std::endl;
+	multiShiftInverse(Denominator, ApproxHalfPowerAction, param.MaxIter, etaOdd, tmp);
+
+	// VdagV^-1/(2*inv_pow) MdagM^1/(2*inv_pow) eta
+	std::cout<<GridLogMessage << action_name() << " refresh: doing (V^dag V)^{-1/" << 2*param.inv_pow << "} ( (M^dag M)^{1/" << 2*param.inv_pow << "} eta)" << std::endl;
+	multiShiftInverse(Numerator, ApproxNegHalfPowerAction, param.MaxIter, tmp, PhiOdd);
+		
+	assert(NumOp.ConstEE() == 1);
+	assert(DenOp.ConstEE() == 1);
+	PhiEven = Zero();
+	std::cout<<GridLogMessage << action_name() << " refresh: starting" << std::endl;
+      };
+
+      //////////////////////////////////////////////////////
+      // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
+      //////////////////////////////////////////////////////
+      virtual RealD S(const GaugeField &U) {
+	std::cout<<GridLogMessage << action_name() << " compute action: starting" << std::endl;
+	ImportGauge(U);
+
+	FermionField X(NumOp.FermionRedBlackGrid());
+	FermionField Y(NumOp.FermionRedBlackGrid());
+
+	// VdagV^1/(2*inv_pow) Phi
+	std::cout<<GridLogMessage << action_name() << " compute action: doing (V^dag V)^{1/" << 2*param.inv_pow << "} Phi" << std::endl;
+	multiShiftInverse(Numerator, ApproxHalfPowerAction, param.MaxIter, PhiOdd,X);
+
+	// MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
+	std::cout<<GridLogMessage << action_name() << " compute action: doing (M^dag M)^{-1/" << 2*param.inv_pow << "} ( (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
+	multiShiftInverse(Denominator, ApproxNegHalfPowerAction, param.MaxIter, X,Y);
+
+	// Randomly apply rational bounds checks.
+	int rcheck = rand();
+	auto grid = NumOp.FermionGrid();
+        auto r=rand();
+        grid->Broadcast(0,r);
+
+	if ( param.BoundsCheckFreq != 0 && (r % param.BoundsCheckFreq)==0 ) { 
+	  std::cout<<GridLogMessage << action_name() << " compute action: doing bounds check" << std::endl;
+	  FermionField gauss(NumOp.FermionRedBlackGrid());
+	  gauss = PhiOdd;
+	  SchurDifferentiableOperator<Impl> MdagM(DenOp);
+	  std::cout<<GridLogMessage << action_name() << " compute action: checking high bounds" << std::endl;
+	  HighBoundCheck(MdagM,gauss,param.hi);
+	  std::cout<<GridLogMessage << action_name() << " compute action: full approximation" << std::endl;
+	  InversePowerBoundsCheck(param.inv_pow,param.MaxIter,param.action_tolerance*100,MdagM,gauss,ApproxNegPowerAction);
+	  std::cout<<GridLogMessage << action_name() << " compute action: bounds check complete" << std::endl;
+	}
+
+	//  Phidag VdagV^1/(2*inv_pow) MdagM^-1/(2*inv_pow)  MdagM^-1/(2*inv_pow) VdagV^1/(2*inv_pow) Phi
+	RealD action = norm2(Y);
+	std::cout<<GridLogMessage << action_name() << " compute action: complete" << std::endl;
+
+	return action;
+      };
+
+      // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
+      //
+      // Here, M is some 5D operator and V is the Pauli-Villars field
+      // N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term
+      //
+      // Need  
+      // dS_f/dU =  chi^dag d[P/Q]  N/D   P/Q  chi 
+      //         +  chi^dag   P/Q d[N/D]  P/Q  chi 
+      //         +  chi^dag   P/Q   N/D d[P/Q] chi 
+      //
+      // P/Q is expressed as partial fraction expansion: 
+      // 
+      //           a0 + \sum_k ak/(V^dagV + bk) 
+      //  
+      // d[P/Q] is then  
+      //
+      //          \sum_k -ak [V^dagV+bk]^{-1}  [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1} 
+      //  
+      // and similar for N/D. 
+      // 
+      // Need   
+      //       MpvPhi_k   = [Vdag V + bk]^{-1} chi  
+      //       MpvPhi     = {a0 +  \sum_k ak [Vdag V + bk]^{-1} }chi   
+      //   
+      //       MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi  
+      //       MfMpvPhi   = {a0 +  \sum_k ak [Mdag M + bk]^{-1} } MpvPhi
+      // 
+      //       MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi   
+      //  
+
+      virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
+	std::cout<<GridLogMessage << action_name() << " deriv: starting" << std::endl;
+	const int n_f  = ApproxNegPowerMD.poles.size();
+	const int n_pv = ApproxHalfPowerMD.poles.size();
+
+	std::vector<FermionField> MpvPhi_k     (n_pv,NumOp.FermionRedBlackGrid());
+	std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
+	std::vector<FermionField> MfMpvPhi_k   (n_f ,NumOp.FermionRedBlackGrid());
+
+	FermionField      MpvPhi(NumOp.FermionRedBlackGrid());
+	FermionField    MfMpvPhi(NumOp.FermionRedBlackGrid());
+	FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid());
+	FermionField           Y(NumOp.FermionRedBlackGrid());
+
+	GaugeField   tmp(NumOp.GaugeGrid());
+
+	ImportGauge(U);
+
+	std::cout<<GridLogMessage << action_name() << " deriv: doing (V^dag V)^{1/" << 2*param.inv_pow << "} Phi" << std::endl;
+	multiShiftInverse(Numerator, ApproxHalfPowerMD, param.MaxIter, PhiOdd,MpvPhi_k,MpvPhi);
+
+	std::cout<<GridLogMessage << action_name() << " deriv: doing (M^dag M)^{-1/" << param.inv_pow << "} ( (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
+	multiShiftInverse(Denominator, ApproxNegPowerMD, param.MaxIter, MpvPhi,MfMpvPhi_k,MfMpvPhi);
+
+	std::cout<<GridLogMessage << action_name() << " deriv: doing (V^dag V)^{1/" << 2*param.inv_pow << "} ( (M^dag M)^{-1/" << param.inv_pow << "} (V^dag V)^{1/" << 2*param.inv_pow << "} Phi)" << std::endl;
+	multiShiftInverse(Numerator, ApproxHalfPowerMD, param.MaxIter, MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi);
+		
+
+	SchurDifferentiableOperator<Impl> MdagM(DenOp);
+	SchurDifferentiableOperator<Impl> VdagV(NumOp);
+
+
+	RealD ak;
+
+	dSdU = Zero();
+
+	// With these building blocks  
+	//  
+	//       dS/dU = 
+	//                 \sum_k -ak MfMpvPhi_k^dag      [ dM^dag M + M^dag dM ] MfMpvPhi_k         (1)
+	//             +   \sum_k -ak MpvMfMpvPhi_k^\dag  [ dV^dag V + V^dag dV ] MpvPhi_k           (2)
+	//                        -ak MpvPhi_k^dag        [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k      (3)
+
+	//(1)	
+	std::cout<<GridLogMessage << action_name() << " deriv: doing dS/dU part (1)" << std::endl;
+	for(int k=0;k<n_f;k++){
+	  ak = ApproxNegPowerMD.residues[k];
+	  MdagM.Mpc(MfMpvPhi_k[k],Y);
+	  MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y );  dSdU=dSdU+ak*tmp;
+	  MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] );  dSdU=dSdU+ak*tmp;
+	}
+	
+	//(2)
+	//(3)
+	std::cout<<GridLogMessage << action_name() << " deriv: doing dS/dU part (2)+(3)" << std::endl;
+	for(int k=0;k<n_pv;k++){
+
+          ak = ApproxHalfPowerMD.residues[k];
+	  
+	  VdagV.Mpc(MpvPhi_k[k],Y);
+	  VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;
+	  VdagV.MpcDeriv   (tmp,Y,MpvMfMpvPhi_k[k]);  dSdU=dSdU+ak*tmp;     
+	  
+	  VdagV.Mpc(MpvMfMpvPhi_k[k],Y);                // V as we take Ydag 
+	  VdagV.MpcDeriv   (tmp,Y, MpvPhi_k[k]); dSdU=dSdU+ak*tmp;
+	  VdagV.MpcDagDeriv(tmp,MpvPhi_k[k], Y); dSdU=dSdU+ak*tmp;
+
+	}
+
+	//dSdU = Ta(dSdU);
+	std::cout<<GridLogMessage << action_name() << " deriv: complete" << std::endl;
+      };
+    };
+
+NAMESPACE_END(Grid);
+
+#endif
diff --git a/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h
new file mode 100644
index 00000000..d154fda5
--- /dev/null
+++ b/Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h
@@ -0,0 +1,93 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h
+
+    Copyright (C) 2015
+
+    Author: Christopher Kelly <ckelly@bnl.gov>
+    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
+#define QCD_PSEUDOFERMION_GENERAL_EVEN_ODD_RATIONAL_RATIO_MIXED_PREC_H
+
+NAMESPACE_BEGIN(Grid);
+
+    /////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Generic rational approximation for ratios of operators utilizing the mixed precision multishift algorithm
+    // cf. GeneralEvenOddRational.h for details
+    /////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      
+    template<class ImplD, class ImplF>
+    class GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<ImplD> {
+    private:
+      typedef typename ImplD::FermionField FermionFieldD;
+      typedef typename ImplF::FermionField FermionFieldF;
+
+      FermionOperator<ImplD> & NumOpD;
+      FermionOperator<ImplD> & DenOpD;
+     
+      FermionOperator<ImplF> & NumOpF;
+      FermionOperator<ImplF> & DenOpF;
+
+      Integer ReliableUpdateFreq;
+    protected:
+
+      //Allow derived classes to override the multishift CG
+      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
+	SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
+	SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
+
+	ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
+	msCG(schurOpD, in, out);
+      }
+      virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
+	SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
+	SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
+
+	ConjugateGradientMultiShiftMixedPrec<FermionFieldD, FermionFieldF> msCG(MaxIter, approx, NumOpF.FermionRedBlackGrid(), schurOpF, ReliableUpdateFreq);
+	msCG(schurOpD, in, out_elems, out);
+      }
+      //Allow derived classes to override the gauge import
+      virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
+	typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
+	precisionChange(Uf, Ud);
+	
+	NumOpD.ImportGauge(Ud);
+	DenOpD.ImportGauge(Ud);
+
+	NumOpF.ImportGauge(Uf);
+	DenOpF.ImportGauge(Uf);
+      }
+      
+    public:
+      GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<ImplD>  &_NumOpD, FermionOperator<ImplD>  &_DenOpD, 
+							      FermionOperator<ImplF>  &_NumOpF, FermionOperator<ImplF>  &_DenOpF, 
+							      const RationalActionParams & p, Integer _ReliableUpdateFreq
+							      ) : GeneralEvenOddRatioRationalPseudoFermionAction<ImplD>(_NumOpD, _DenOpD, p),
+								  ReliableUpdateFreq(_ReliableUpdateFreq), NumOpD(_NumOpD), DenOpD(_DenOpD), NumOpF(_NumOpF), DenOpF(_DenOpF){}
+      
+      virtual std::string action_name(){return "GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction";}
+    };
+
+NAMESPACE_END(Grid);
+
+#endif
diff --git a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h
index e968b8e4..1b36ae0f 100644
--- a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h
+++ b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h
@@ -40,249 +40,31 @@ NAMESPACE_BEGIN(Grid);
     // Here N/D \sim R_{-1/2} ~ (M^dagM)^{-1/2}  
   
     template<class Impl>
-    class OneFlavourEvenOddRatioRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
+    class OneFlavourEvenOddRatioRationalPseudoFermionAction : public GeneralEvenOddRatioRationalPseudoFermionAction<Impl> {
     public:
-
-      INHERIT_IMPL_TYPES(Impl);
-
       typedef OneFlavourRationalParams Params;
-      Params param;
-
-      MultiShiftFunction PowerHalf   ;
-      MultiShiftFunction PowerNegHalf;
-      MultiShiftFunction PowerQuarter;
-      MultiShiftFunction PowerNegQuarter;
-
     private:
-     
-      FermionOperator<Impl> & NumOp;// the basic operator
-      FermionOperator<Impl> & DenOp;// the basic operator
-      FermionField PhiEven; // the pseudo fermion field for this trajectory
-      FermionField PhiOdd; // the pseudo fermion field for this trajectory
+      static RationalActionParams transcribe(const Params &in){
+	RationalActionParams out;
+	out.inv_pow = 2;
+	out.lo = in.lo;
+	out.hi = in.hi;
+	out.MaxIter = in.MaxIter;
+	out.action_tolerance = out.md_tolerance = in.tolerance;
+	out.action_degree = out.md_degree = in.degree;
+	out.precision = in.precision;
+	out.BoundsCheckFreq = in.BoundsCheckFreq;
+	return out;
+      }
 
     public:
-
       OneFlavourEvenOddRatioRationalPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
-					    FermionOperator<Impl>  &_DenOp, 
-					    Params & p
-					    ) : 
-      NumOp(_NumOp), 
-      DenOp(_DenOp), 
-      PhiOdd (_NumOp.FermionRedBlackGrid()),
-      PhiEven(_NumOp.FermionRedBlackGrid()),
-      param(p) 
-      {
-	AlgRemez remez(param.lo,param.hi,param.precision);
+							FermionOperator<Impl>  &_DenOp, 
+							const Params & p
+							) : 
+	GeneralEvenOddRatioRationalPseudoFermionAction<Impl>(_NumOp, _DenOp, transcribe(p)){}
 
-	// MdagM^(+- 1/2)
-	std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl;
-	remez.generateApprox(param.degree,1,2);
-	PowerHalf.Init(remez,param.tolerance,false);
-	PowerNegHalf.Init(remez,param.tolerance,true);
-
-	// MdagM^(+- 1/4)
-	std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
-	remez.generateApprox(param.degree,1,4);
-   	PowerQuarter.Init(remez,param.tolerance,false);
-	PowerNegQuarter.Init(remez,param.tolerance,true);
-      };
-
-      virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
-
-      virtual std::string LogParameters(){
-	std::stringstream sstream;
-	sstream << GridLogMessage << "["<<action_name()<<"] Low            :" << param.lo <<  std::endl;
-	sstream << GridLogMessage << "["<<action_name()<<"] High           :" << param.hi <<  std::endl;
-	sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter <<  std::endl;
-	sstream << GridLogMessage << "["<<action_name()<<"] Tolerance      :" << param.tolerance <<  std::endl;
-	sstream << GridLogMessage << "["<<action_name()<<"] Degree         :" << param.degree <<  std::endl;
-	sstream << GridLogMessage << "["<<action_name()<<"] Precision      :" << param.precision <<  std::endl;
-	return sstream.str();
-      }
-      
-      
-      virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
-
-	// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
-	//
-	// P(phi) = e^{- phi^dag (VdagV)^1/4 (MdagM)^-1/2 (VdagV)^1/4 phi}
-	//        = e^{- phi^dag  (VdagV)^1/4 (MdagM)^-1/4 (MdagM)^-1/4  (VdagV)^1/4 phi}
-	//
-	// Phi =  (VdagV)^-1/4 Mdag^{1/4} eta 
-	//
-	// P(eta) = e^{- eta^dag eta}
-	//
-	// e^{x^2/2 sig^2} => sig^2 = 0.5.
-	// 
-	// So eta should be of width sig = 1/sqrt(2).
-
-	RealD scale = std::sqrt(0.5);
-
-	FermionField eta(NumOp.FermionGrid());
-	FermionField etaOdd (NumOp.FermionRedBlackGrid());
-	FermionField etaEven(NumOp.FermionRedBlackGrid());
-	FermionField     tmp(NumOp.FermionRedBlackGrid());
-
-	gaussian(pRNG,eta);	eta=eta*scale;
-
-	pickCheckerboard(Even,etaEven,eta);
-	pickCheckerboard(Odd,etaOdd,eta);
-
-	NumOp.ImportGauge(U);
-	DenOp.ImportGauge(U);
-
-
-	// MdagM^1/4 eta
-	SchurDifferentiableOperator<Impl> MdagM(DenOp);
-	ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerQuarter);
-	msCG_M(MdagM,etaOdd,tmp);
-
-	// VdagV^-1/4 MdagM^1/4 eta
-	SchurDifferentiableOperator<Impl> VdagV(NumOp);
-	ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerNegQuarter);
-	msCG_V(VdagV,tmp,PhiOdd);
-
-	assert(NumOp.ConstEE() == 1);
-	assert(DenOp.ConstEE() == 1);
-	PhiEven = Zero();
-	
-      };
-
-      //////////////////////////////////////////////////////
-      // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
-      //////////////////////////////////////////////////////
-      virtual RealD S(const GaugeField &U) {
-
-	NumOp.ImportGauge(U);
-	DenOp.ImportGauge(U);
-
-	FermionField X(NumOp.FermionRedBlackGrid());
-	FermionField Y(NumOp.FermionRedBlackGrid());
-
-	// VdagV^1/4 Phi
-	SchurDifferentiableOperator<Impl> VdagV(NumOp);
-	ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
-	msCG_V(VdagV,PhiOdd,X);
-
-	// MdagM^-1/4 VdagV^1/4 Phi
-	SchurDifferentiableOperator<Impl> MdagM(DenOp);
-	ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegQuarter);
-	msCG_M(MdagM,X,Y);
-
-	// Randomly apply rational bounds checks.
-	auto grid = NumOp.FermionGrid();
-        auto r=rand();
-        grid->Broadcast(0,r);
-        if ( (r%param.BoundsCheckFreq)==0 ) { 
-	  FermionField gauss(NumOp.FermionRedBlackGrid());
-	  gauss = PhiOdd;
-	  HighBoundCheck(MdagM,gauss,param.hi);
-	  InverseSqrtBoundsCheck(param.MaxIter,param.tolerance*100,MdagM,gauss,PowerNegHalf);
-	}
-
-	//  Phidag VdagV^1/4 MdagM^-1/4  MdagM^-1/4 VdagV^1/4 Phi
-	RealD action = norm2(Y);
-
-	return action;
-      };
-
-      // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi       
-      //
-      // Here, M is some 5D operator and V is the Pauli-Villars field
-      // N and D makeup the rat. poly of the M term and P and & makeup the rat.poly of the denom term
-      //
-      // Need  
-      // dS_f/dU =  chi^dag d[P/Q]  N/D   P/Q  chi 
-      //         +  chi^dag   P/Q d[N/D]  P/Q  chi 
-      //         +  chi^dag   P/Q   N/D d[P/Q] chi 
-      //
-      // P/Q is expressed as partial fraction expansion: 
-      // 
-      //           a0 + \sum_k ak/(V^dagV + bk) 
-      //  
-      // d[P/Q] is then  
-      //
-      //          \sum_k -ak [V^dagV+bk]^{-1}  [ dV^dag V + V^dag dV ] [V^dag V + bk]^{-1} 
-      //  
-      // and similar for N/D. 
-      // 
-      // Need   
-      //       MpvPhi_k   = [Vdag V + bk]^{-1} chi  
-      //       MpvPhi     = {a0 +  \sum_k ak [Vdag V + bk]^{-1} }chi   
-      //   
-      //       MfMpvPhi_k = [MdagM+bk]^{-1} MpvPhi  
-      //       MfMpvPhi   = {a0 +  \sum_k ak [Mdag M + bk]^{-1} } MpvPhi
-      // 
-      //       MpvMfMpvPhi_k = [Vdag V + bk]^{-1} MfMpvchi   
-      //  
-
-      virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
-
-	const int n_f  = PowerNegHalf.poles.size();
-	const int n_pv = PowerQuarter.poles.size();
-
-	std::vector<FermionField> MpvPhi_k     (n_pv,NumOp.FermionRedBlackGrid());
-	std::vector<FermionField> MpvMfMpvPhi_k(n_pv,NumOp.FermionRedBlackGrid());
-	std::vector<FermionField> MfMpvPhi_k   (n_f ,NumOp.FermionRedBlackGrid());
-
-	FermionField      MpvPhi(NumOp.FermionRedBlackGrid());
-	FermionField    MfMpvPhi(NumOp.FermionRedBlackGrid());
-	FermionField MpvMfMpvPhi(NumOp.FermionRedBlackGrid());
-	FermionField           Y(NumOp.FermionRedBlackGrid());
-
-	GaugeField   tmp(NumOp.GaugeGrid());
-
-	NumOp.ImportGauge(U);
-	DenOp.ImportGauge(U);
-
-	SchurDifferentiableOperator<Impl> VdagV(NumOp);
-	SchurDifferentiableOperator<Impl> MdagM(DenOp);
-
-	ConjugateGradientMultiShift<FermionField> msCG_V(param.MaxIter,PowerQuarter);
-	ConjugateGradientMultiShift<FermionField> msCG_M(param.MaxIter,PowerNegHalf);
-
-	msCG_V(VdagV,PhiOdd,MpvPhi_k,MpvPhi);
-	msCG_M(MdagM,MpvPhi,MfMpvPhi_k,MfMpvPhi);
-	msCG_V(VdagV,MfMpvPhi,MpvMfMpvPhi_k,MpvMfMpvPhi);
-
-	RealD ak;
-
-	dSdU = Zero();
-
-	// With these building blocks  
-	//  
-	//       dS/dU = 
-	//                 \sum_k -ak MfMpvPhi_k^dag      [ dM^dag M + M^dag dM ] MfMpvPhi_k         (1)
-	//             +   \sum_k -ak MpvMfMpvPhi_k^\dag  [ dV^dag V + V^dag dV ] MpvPhi_k           (2)
-	//                        -ak MpvPhi_k^dag        [ dV^dag V + V^dag dV ] MpvMfMpvPhi_k      (3)
-
-	//(1)
-	for(int k=0;k<n_f;k++){
-	  ak = PowerNegHalf.residues[k];
-	  MdagM.Mpc(MfMpvPhi_k[k],Y);
-	  MdagM.MpcDagDeriv(tmp , MfMpvPhi_k[k], Y );  dSdU=dSdU+ak*tmp;
-	  MdagM.MpcDeriv(tmp , Y, MfMpvPhi_k[k] );  dSdU=dSdU+ak*tmp;
-	}
-	
-	//(2)
-	//(3)
-	for(int k=0;k<n_pv;k++){
-
-          ak = PowerQuarter.residues[k];
-	  
-	  VdagV.Mpc(MpvPhi_k[k],Y);
-	  VdagV.MpcDagDeriv(tmp,MpvMfMpvPhi_k[k],Y); dSdU=dSdU+ak*tmp;
-	  VdagV.MpcDeriv   (tmp,Y,MpvMfMpvPhi_k[k]);  dSdU=dSdU+ak*tmp;     
-	  
-	  VdagV.Mpc(MpvMfMpvPhi_k[k],Y);                // V as we take Ydag 
-	  VdagV.MpcDeriv   (tmp,Y, MpvPhi_k[k]); dSdU=dSdU+ak*tmp;
-	  VdagV.MpcDagDeriv(tmp,MpvPhi_k[k], Y); dSdU=dSdU+ak*tmp;
-
-	}
-
-	//dSdU = Ta(dSdU);
-
-      };
+      virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}      
     };
 
 NAMESPACE_END(Grid);
diff --git a/Grid/qcd/action/pseudofermion/PseudoFermion.h b/Grid/qcd/action/pseudofermion/PseudoFermion.h
index 6674bdd6..dfa94043 100644
--- a/Grid/qcd/action/pseudofermion/PseudoFermion.h
+++ b/Grid/qcd/action/pseudofermion/PseudoFermion.h
@@ -40,6 +40,8 @@ directory
 #include <Grid/qcd/action/pseudofermion/OneFlavourRational.h>
 #include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
 #include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
+#include <Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatio.h>
+#include <Grid/qcd/action/pseudofermion/GeneralEvenOddRationalRatioMixedPrec.h>
 #include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
 #include <Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h>
 
diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
index da628c75..1ce9445f 100644
--- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
+++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
@@ -83,16 +83,7 @@ NAMESPACE_BEGIN(Grid);
 	return sstream.str();
       } 
 
-      
       virtual void refresh(const GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) {
-
-        // P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
-        //
-        // NumOp == V
-        // DenOp == M
-        //
-        // Take phi_o = Vpcdag^{-1} Mpcdag eta_o  ; eta_o = Mpcdag^{-1} Vpcdag Phi
-        //
         // P(eta_o) = e^{- eta_o^dag eta_o}
         //
         // e^{x^2/2 sig^2} => sig^2 = 0.5.
@@ -100,12 +91,22 @@ NAMESPACE_BEGIN(Grid);
         RealD scale = std::sqrt(0.5);
 
         FermionField eta    (NumOp.FermionGrid());
+        gaussian(pRNG,eta); eta = eta * scale;
+
+	refresh(U,eta);
+      }
+	
+      void refresh(const GaugeField &U, const FermionField &eta) {
+        // P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
+        //
+        // NumOp == V
+        // DenOp == M
+        //
+        // Take phi_o = Vpcdag^{-1} Mpcdag eta_o  ; eta_o = Mpcdag^{-1} Vpcdag Phi
         FermionField etaOdd (NumOp.FermionRedBlackGrid());
         FermionField etaEven(NumOp.FermionRedBlackGrid());
         FermionField tmp    (NumOp.FermionRedBlackGrid());
 
-        gaussian(pRNG,eta);
-
         pickCheckerboard(Even,etaEven,eta);
         pickCheckerboard(Odd,etaOdd,eta);
 
@@ -125,8 +126,8 @@ NAMESPACE_BEGIN(Grid);
         DenOp.MooeeDag(etaEven,tmp);
         NumOp.MooeeInvDag(tmp,PhiEven);
 
-        PhiOdd =PhiOdd*scale;
-        PhiEven=PhiEven*scale;
+        //PhiOdd =PhiOdd*scale;
+        //PhiEven=PhiEven*scale;
         
       };
 
diff --git a/Grid/qcd/gparity/Gparity.h b/Grid/qcd/gparity/Gparity.h
new file mode 100644
index 00000000..ce1c70eb
--- /dev/null
+++ b/Grid/qcd/gparity/Gparity.h
@@ -0,0 +1,6 @@
+#ifndef GRID_GPARITY_H_
+#define GRID_GPARITY_H_
+
+#include<Grid/qcd/gparity/GparityFlavour.h>
+
+#endif
diff --git a/Grid/qcd/gparity/GparityFlavour.cc b/Grid/qcd/gparity/GparityFlavour.cc
new file mode 100644
index 00000000..4596f96b
--- /dev/null
+++ b/Grid/qcd/gparity/GparityFlavour.cc
@@ -0,0 +1,34 @@
+#include <Grid/Grid.h>
+
+NAMESPACE_BEGIN(Grid);
+
+const std::array<const GparityFlavour, 3> GparityFlavour::sigma_mu = {{
+    GparityFlavour(GparityFlavour::Algebra::SigmaX),
+    GparityFlavour(GparityFlavour::Algebra::SigmaY),
+    GparityFlavour(GparityFlavour::Algebra::SigmaZ)
+    }};
+
+const std::array<const GparityFlavour, 6> GparityFlavour::sigma_all = {{
+  GparityFlavour(GparityFlavour::Algebra::Identity),
+  GparityFlavour(GparityFlavour::Algebra::SigmaX),
+  GparityFlavour(GparityFlavour::Algebra::SigmaY),
+  GparityFlavour(GparityFlavour::Algebra::SigmaZ),
+  GparityFlavour(GparityFlavour::Algebra::ProjPlus),
+  GparityFlavour(GparityFlavour::Algebra::ProjMinus)
+}};
+
+const std::array<const char *, GparityFlavour::nSigma> GparityFlavour::name = {{
+    "SigmaX",
+    "MinusSigmaX",
+    "SigmaY",
+    "MinusSigmaY",
+    "SigmaZ",
+    "MinusSigmaZ",
+    "Identity",
+    "MinusIdentity",
+    "ProjPlus",
+    "MinusProjPlus",
+    "ProjMinus",
+    "MinusProjMinus"}};
+
+NAMESPACE_END(Grid);
diff --git a/Grid/qcd/gparity/GparityFlavour.h b/Grid/qcd/gparity/GparityFlavour.h
new file mode 100644
index 00000000..b2009235
--- /dev/null
+++ b/Grid/qcd/gparity/GparityFlavour.h
@@ -0,0 +1,475 @@
+#ifndef GRID_QCD_GPARITY_FLAVOUR_H
+#define GRID_QCD_GPARITY_FLAVOUR_H
+
+//Support for flavour-matrix operations acting on the G-parity flavour index
+
+#include <array>
+
+NAMESPACE_BEGIN(Grid);
+
+class GparityFlavour {
+  public:
+    GRID_SERIALIZABLE_ENUM(Algebra, undef,
+                           SigmaX, 0,
+			   MinusSigmaX, 1,
+                           SigmaY, 2,
+			   MinusSigmaY, 3,
+                           SigmaZ, 4,
+			   MinusSigmaZ, 5,
+			   Identity, 6,
+			   MinusIdentity, 7,
+			   ProjPlus, 8,
+			   MinusProjPlus, 9,
+			   ProjMinus, 10,
+			   MinusProjMinus, 11
+			   );
+    static constexpr unsigned int nSigma = 12;
+    static const std::array<const char *, nSigma>                name;
+    static const std::array<const GparityFlavour, 3>             sigma_mu;
+    static const std::array<const GparityFlavour, 6>            sigma_all;
+    Algebra                                                      g;
+  public:
+  accelerator GparityFlavour(Algebra initg): g(initg) {}  
+};
+
+
+
+// 0 1  x   vector
+// 1 0
+template<class vtype>
+accelerator_inline void multFlavourSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = rhs(1);
+  ret(1) = rhs(0);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = rhs(1,0);
+  ret(0,1) = rhs(1,1);
+  ret(1,0) = rhs(0,0);
+  ret(1,1) = rhs(0,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = rhs(0,1);
+  ret(0,1) = rhs(0,0);
+  ret(1,0) = rhs(1,1);
+  ret(1,1) = rhs(1,0);
+};
+
+
+template<class vtype>
+accelerator_inline void multFlavourMinusSigmaX(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = -rhs(1);
+  ret(1) = -rhs(0);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -rhs(1,0);
+  ret(0,1) = -rhs(1,1);
+  ret(1,0) = -rhs(0,0);
+  ret(1,1) = -rhs(0,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourMinusSigmaX(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -rhs(0,1);
+  ret(0,1) = -rhs(0,0);
+  ret(1,0) = -rhs(1,1);
+  ret(1,1) = -rhs(1,0);
+};
+
+
+
+
+
+// 0 -i  x   vector
+// i 0
+template<class vtype>
+accelerator_inline void multFlavourSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = timesMinusI(rhs(1));
+  ret(1) = timesI(rhs(0));
+};
+template<class vtype>
+accelerator_inline void lmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = timesMinusI(rhs(1,0));
+  ret(0,1) = timesMinusI(rhs(1,1));
+  ret(1,0) = timesI(rhs(0,0));
+  ret(1,1) = timesI(rhs(0,1));
+};
+template<class vtype>
+accelerator_inline void rmultFlavourSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = timesI(rhs(0,1));
+  ret(0,1) = timesMinusI(rhs(0,0));
+  ret(1,0) = timesI(rhs(1,1));
+  ret(1,1) = timesMinusI(rhs(1,0));
+};
+
+template<class vtype>
+accelerator_inline void multFlavourMinusSigmaY(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = timesI(rhs(1));
+  ret(1) = timesMinusI(rhs(0));
+};
+template<class vtype>
+accelerator_inline void lmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = timesI(rhs(1,0));
+  ret(0,1) = timesI(rhs(1,1));
+  ret(1,0) = timesMinusI(rhs(0,0));
+  ret(1,1) = timesMinusI(rhs(0,1));
+};
+template<class vtype>
+accelerator_inline void rmultFlavourMinusSigmaY(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = timesMinusI(rhs(0,1));
+  ret(0,1) = timesI(rhs(0,0));
+  ret(1,0) = timesMinusI(rhs(1,1));
+  ret(1,1) = timesI(rhs(1,0));
+};
+
+
+
+
+
+// 1 0  x   vector
+// 0 -1
+template<class vtype>
+accelerator_inline void multFlavourSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = rhs(0);
+  ret(1) = -rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = rhs(0,0);
+  ret(0,1) = rhs(0,1);
+  ret(1,0) = -rhs(1,0);
+  ret(1,1) = -rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = rhs(0,0);
+  ret(0,1) = -rhs(0,1);
+  ret(1,0) = rhs(1,0);
+  ret(1,1) = -rhs(1,1);
+};
+
+
+template<class vtype>
+accelerator_inline void multFlavourMinusSigmaZ(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = -rhs(0);
+  ret(1) = rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -rhs(0,0);
+  ret(0,1) = -rhs(0,1);
+  ret(1,0) = rhs(1,0);
+  ret(1,1) = rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourMinusSigmaZ(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -rhs(0,0);
+  ret(0,1) = rhs(0,1);
+  ret(1,0) = -rhs(1,0);
+  ret(1,1) = rhs(1,1);
+};
+
+
+
+
+
+
+template<class vtype>
+accelerator_inline void multFlavourIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = rhs(0);
+  ret(1) = rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = rhs(0,0);
+  ret(0,1) = rhs(0,1);
+  ret(1,0) = rhs(1,0);
+  ret(1,1) = rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = rhs(0,0);
+  ret(0,1) = rhs(0,1);
+  ret(1,0) = rhs(1,0);
+  ret(1,1) = rhs(1,1);
+};
+
+template<class vtype>
+accelerator_inline void multFlavourMinusIdentity(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = -rhs(0);
+  ret(1) = -rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -rhs(0,0);
+  ret(0,1) = -rhs(0,1);
+  ret(1,0) = -rhs(1,0);
+  ret(1,1) = -rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourMinusIdentity(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -rhs(0,0);
+  ret(0,1) = -rhs(0,1);
+  ret(1,0) = -rhs(1,0);
+  ret(1,1) = -rhs(1,1);
+};
+
+
+
+
+
+//G-parity flavour projection 1/2(1+\sigma_2)
+//1 -i
+//i  1
+template<class vtype>
+accelerator_inline void multFlavourProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = 0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
+  ret(1) = 0.5*timesI(rhs(0)) + 0.5*rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
+  ret(0,1) = 0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
+  ret(1,0) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(1,0);
+  ret(1,1) = 0.5*timesI(rhs(0,1)) + 0.5*rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
+  ret(0,1) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(0,1);
+  ret(1,0) = 0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
+  ret(1,1) = 0.5*timesMinusI(rhs(1,0)) + 0.5*rhs(1,1);
+};
+
+
+template<class vtype>
+accelerator_inline void multFlavourMinusProjPlus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = -0.5*rhs(0) + 0.5*timesI(rhs(1));
+  ret(1) = 0.5*timesMinusI(rhs(0)) - 0.5*rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
+  ret(0,1) = -0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
+  ret(1,0) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(1,0);
+  ret(1,1) = 0.5*timesMinusI(rhs(0,1)) - 0.5*rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourMinusProjPlus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
+  ret(0,1) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(0,1);
+  ret(1,0) = -0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
+  ret(1,1) = 0.5*timesI(rhs(1,0)) - 0.5*rhs(1,1);
+};
+
+
+
+
+
+//G-parity flavour projection 1/2(1-\sigma_2)
+//1 i
+//-i  1
+template<class vtype>
+accelerator_inline void multFlavourProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = 0.5*rhs(0) + 0.5*timesI(rhs(1));
+  ret(1) = 0.5*timesMinusI(rhs(0)) + 0.5*rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = 0.5*rhs(0,0) + 0.5*timesI(rhs(1,0));
+  ret(0,1) = 0.5*rhs(0,1) + 0.5*timesI(rhs(1,1));
+  ret(1,0) = 0.5*timesMinusI(rhs(0,0)) + 0.5*rhs(1,0);
+  ret(1,1) = 0.5*timesMinusI(rhs(0,1)) + 0.5*rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = 0.5*rhs(0,0) + 0.5*timesMinusI(rhs(0,1));
+  ret(0,1) = 0.5*timesI(rhs(0,0)) + 0.5*rhs(0,1);
+  ret(1,0) = 0.5*rhs(1,0) + 0.5*timesMinusI(rhs(1,1));
+  ret(1,1) = 0.5*timesI(rhs(1,0)) + 0.5*rhs(1,1);
+};
+
+
+template<class vtype>
+accelerator_inline void multFlavourMinusProjMinus(iVector<vtype, Ngp> &ret, const iVector<vtype, Ngp> &rhs)
+{
+  ret(0) = -0.5*rhs(0) + 0.5*timesMinusI(rhs(1));
+  ret(1) = 0.5*timesI(rhs(0)) - 0.5*rhs(1);
+};
+template<class vtype>
+accelerator_inline void lmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -0.5*rhs(0,0) + 0.5*timesMinusI(rhs(1,0));
+  ret(0,1) = -0.5*rhs(0,1) + 0.5*timesMinusI(rhs(1,1));
+  ret(1,0) = 0.5*timesI(rhs(0,0)) - 0.5*rhs(1,0);
+  ret(1,1) = 0.5*timesI(rhs(0,1)) - 0.5*rhs(1,1);
+};
+template<class vtype>
+accelerator_inline void rmultFlavourMinusProjMinus(iMatrix<vtype, Ngp> &ret, const iMatrix<vtype, Ngp> &rhs)
+{
+  ret(0,0) = -0.5*rhs(0,0) + 0.5*timesI(rhs(0,1));
+  ret(0,1) = 0.5*timesMinusI(rhs(0,0)) - 0.5*rhs(0,1);
+  ret(1,0) = -0.5*rhs(1,0) + 0.5*timesI(rhs(1,1));
+  ret(1,1) = 0.5*timesMinusI(rhs(1,0)) - 0.5*rhs(1,1);
+};
+
+
+
+
+
+
+
+
+
+
+template<class vtype> 
+accelerator_inline auto operator*(const GparityFlavour &G, const iVector<vtype, Ngp> &arg)
+->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Ngp>, GparityFlavourTensorIndex>::value, iVector<vtype, Ngp>>::type
+{
+  iVector<vtype, Ngp> ret;
+
+  switch (G.g) 
+  {
+  case GparityFlavour::Algebra::SigmaX:
+    multFlavourSigmaX(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaX:
+    multFlavourMinusSigmaX(ret, arg); break;
+  case GparityFlavour::Algebra::SigmaY:
+    multFlavourSigmaY(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaY:
+    multFlavourMinusSigmaY(ret, arg); break;
+  case GparityFlavour::Algebra::SigmaZ:
+    multFlavourSigmaZ(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaZ:
+    multFlavourMinusSigmaZ(ret, arg); break;
+  case GparityFlavour::Algebra::Identity:
+    multFlavourIdentity(ret, arg); break;
+  case GparityFlavour::Algebra::MinusIdentity:
+    multFlavourMinusIdentity(ret, arg); break;
+  case GparityFlavour::Algebra::ProjPlus:
+    multFlavourProjPlus(ret, arg); break;
+  case GparityFlavour::Algebra::MinusProjPlus:
+    multFlavourMinusProjPlus(ret, arg); break;
+  case GparityFlavour::Algebra::ProjMinus:
+    multFlavourProjMinus(ret, arg); break;
+  case GparityFlavour::Algebra::MinusProjMinus:
+    multFlavourMinusProjMinus(ret, arg); break;
+  default: assert(0);
+  }
+ 
+  return ret;
+}
+
+template<class vtype> 
+accelerator_inline auto operator*(const GparityFlavour &G, const iMatrix<vtype, Ngp> &arg)
+->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
+{
+  iMatrix<vtype, Ngp> ret;
+
+  switch (G.g) 
+  {
+  case GparityFlavour::Algebra::SigmaX:
+    lmultFlavourSigmaX(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaX:
+    lmultFlavourMinusSigmaX(ret, arg); break;
+  case GparityFlavour::Algebra::SigmaY:
+    lmultFlavourSigmaY(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaY:
+    lmultFlavourMinusSigmaY(ret, arg); break;
+  case GparityFlavour::Algebra::SigmaZ:
+    lmultFlavourSigmaZ(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaZ:
+    lmultFlavourMinusSigmaZ(ret, arg); break;
+  case GparityFlavour::Algebra::Identity:
+    lmultFlavourIdentity(ret, arg); break;
+  case GparityFlavour::Algebra::MinusIdentity:
+    lmultFlavourMinusIdentity(ret, arg); break;
+  case GparityFlavour::Algebra::ProjPlus:
+    lmultFlavourProjPlus(ret, arg); break;
+  case GparityFlavour::Algebra::MinusProjPlus:
+    lmultFlavourMinusProjPlus(ret, arg); break;
+  case GparityFlavour::Algebra::ProjMinus:
+    lmultFlavourProjMinus(ret, arg); break;
+  case GparityFlavour::Algebra::MinusProjMinus:
+    lmultFlavourMinusProjMinus(ret, arg); break;  
+  default: assert(0);
+  }
+  
+  return ret;
+}
+
+template<class vtype> 
+accelerator_inline auto operator*(const iMatrix<vtype, Ngp> &arg, const GparityFlavour &G)
+->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Ngp>, GparityFlavourTensorIndex>::value, iMatrix<vtype, Ngp>>::type
+{
+  iMatrix<vtype, Ngp> ret;
+
+  switch (G.g) 
+  {
+  case GparityFlavour::Algebra::SigmaX:
+    rmultFlavourSigmaX(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaX:
+    rmultFlavourMinusSigmaX(ret, arg); break;
+  case GparityFlavour::Algebra::SigmaY:
+    rmultFlavourSigmaY(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaY:
+    rmultFlavourMinusSigmaY(ret, arg); break;
+  case GparityFlavour::Algebra::SigmaZ:
+    rmultFlavourSigmaZ(ret, arg); break;
+  case GparityFlavour::Algebra::MinusSigmaZ:
+    rmultFlavourMinusSigmaZ(ret, arg); break;
+  case GparityFlavour::Algebra::Identity:
+    rmultFlavourIdentity(ret, arg); break;
+  case GparityFlavour::Algebra::MinusIdentity:
+    rmultFlavourMinusIdentity(ret, arg); break;
+  case GparityFlavour::Algebra::ProjPlus:
+    rmultFlavourProjPlus(ret, arg); break;
+  case GparityFlavour::Algebra::MinusProjPlus:
+    rmultFlavourMinusProjPlus(ret, arg); break;
+  case GparityFlavour::Algebra::ProjMinus:
+    rmultFlavourProjMinus(ret, arg); break;
+  case GparityFlavour::Algebra::MinusProjMinus:
+    rmultFlavourMinusProjMinus(ret, arg); break;
+  default: assert(0);
+  }
+
+  return ret;
+}
+
+NAMESPACE_END(Grid);
+
+#endif // include guard
diff --git a/Grid/qcd/hmc/GenericHMCrunner.h b/Grid/qcd/hmc/GenericHMCrunner.h
index 98e8175a..73381351 100644
--- a/Grid/qcd/hmc/GenericHMCrunner.h
+++ b/Grid/qcd/hmc/GenericHMCrunner.h
@@ -129,18 +129,10 @@ public:
     Runner(S);
   }
 
-  //////////////////////////////////////////////////////////////////
-
-private:
-  template <class SmearingPolicy>
-  void Runner(SmearingPolicy &Smearing) {
-    auto UGrid = Resources.GetCartesian();
-    Resources.AddRNGs();
-    Field U(UGrid);
-
-    // Can move this outside?
-    typedef IntegratorType<SmearingPolicy> TheIntegrator;
-    TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
+  //Use the checkpointer to initialize the RNGs and the gauge field, writing the resulting gauge field into U.
+  //This is called automatically by Run but may be useful elsewhere, e.g. for integrator tuning experiments
+  void initializeGaugeFieldAndRNGs(Field &U){
+    if(!Resources.haveRNGs()) Resources.AddRNGs();
 
     if (Parameters.StartingType == "HotStart") {
       // Hot start
@@ -167,6 +159,22 @@ private:
 	<< "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
       exit(1);
     }
+  }
+
+
+
+  //////////////////////////////////////////////////////////////////
+
+private:
+  template <class SmearingPolicy>
+  void Runner(SmearingPolicy &Smearing) {
+    auto UGrid = Resources.GetCartesian();
+    Field U(UGrid);
+
+    initializeGaugeFieldAndRNGs(U);
+
+    typedef IntegratorType<SmearingPolicy> TheIntegrator;
+    TheIntegrator MDynamics(UGrid, Parameters.MD, TheAction, Smearing);
 
     Smearing.set_Field(U);
 
diff --git a/Grid/qcd/hmc/HMC.h b/Grid/qcd/hmc/HMC.h
index 44674ea5..129e2b24 100644
--- a/Grid/qcd/hmc/HMC.h
+++ b/Grid/qcd/hmc/HMC.h
@@ -115,21 +115,21 @@ private:
 
     random(sRNG, rn_test);
 
-    std::cout << GridLogMessage
+    std::cout << GridLogHMC
               << "--------------------------------------------------\n";
-    std::cout << GridLogMessage << "exp(-dH) = " << prob
+    std::cout << GridLogHMC << "exp(-dH) = " << prob
               << "  Random = " << rn_test << "\n";
-    std::cout << GridLogMessage
+    std::cout << GridLogHMC
               << "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
 
     if ((prob > 1.0) || (rn_test <= prob)) {  // accepted
-      std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n";
-      std::cout << GridLogMessage
+      std::cout << GridLogHMC << "Metropolis_test -- ACCEPTED\n";
+      std::cout << GridLogHMC
                 << "--------------------------------------------------\n";
       return true;
     } else {  // rejected
-      std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n";
-      std::cout << GridLogMessage
+      std::cout << GridLogHMC << "Metropolis_test -- REJECTED\n";
+      std::cout << GridLogHMC
                 << "--------------------------------------------------\n";
       return false;
     }
@@ -145,7 +145,7 @@ private:
 
     std::streamsize current_precision = std::cout.precision();
     std::cout.precision(15);
-    std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n";
+    std::cout << GridLogHMC << "Total H before trajectory = " << H0 << "\n";
     std::cout.precision(current_precision);
 
     TheIntegrator.integrate(U);
@@ -165,7 +165,7 @@ private:
 
 
     std::cout.precision(15);
-    std::cout << GridLogMessage << "Total H after trajectory  = " << H1
+    std::cout << GridLogHMC << "Total H after trajectory  = " << H1
 	      << "  dH = " << H1 - H0 << "\n";
     std::cout.precision(current_precision);
     
@@ -196,9 +196,9 @@ public:
     // Actual updates (evolve a copy Ucopy then copy back eventually)
     unsigned int FinalTrajectory = Params.Trajectories + Params.NoMetropolisUntil + Params.StartTrajectory;
     for (int traj = Params.StartTrajectory; traj < FinalTrajectory; ++traj) {
-      std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n";
+      std::cout << GridLogHMC << "-- # Trajectory = " << traj << "\n";
       if (traj < Params.StartTrajectory + Params.NoMetropolisUntil) {
-      	std::cout << GridLogMessage << "-- Thermalization" << std::endl;
+      	std::cout << GridLogHMC << "-- Thermalization" << std::endl;
       }
       
       double t0=usecond();
@@ -207,10 +207,10 @@ public:
       DeltaH = evolve_hmc_step(Ucopy);
       // Metropolis-Hastings test
       bool accept = true;
-      if (traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
+      if (Params.MetropolisTest && traj >= Params.StartTrajectory + Params.NoMetropolisUntil) {
         accept = metropolis_test(DeltaH);
       } else {
-      	std::cout << GridLogMessage << "Skipping Metropolis test" << std::endl;
+      	std::cout << GridLogHMC << "Skipping Metropolis test" << std::endl;
       }
 
       if (accept)
@@ -219,7 +219,7 @@ public:
      
       
       double t1=usecond();
-      std::cout << GridLogMessage << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
+      std::cout << GridLogHMC << "Total time for trajectory (s): " << (t1-t0)/1e6 << std::endl;
 
 
       for (int obs = 0; obs < Observables.size(); obs++) {
@@ -228,7 +228,7 @@ public:
       	std::cout << GridLogDebug << "Observables pointer " << Observables[obs] << std::endl;
         Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
       }
-      std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
+      std::cout << GridLogHMC << ":::::::::::::::::::::::::::::::::::::::::::" << std::endl;
     }
   }
 
diff --git a/Grid/qcd/hmc/HMCResourceManager.h b/Grid/qcd/hmc/HMCResourceManager.h
index 783e4890..c088cd41 100644
--- a/Grid/qcd/hmc/HMCResourceManager.h
+++ b/Grid/qcd/hmc/HMCResourceManager.h
@@ -226,6 +226,9 @@ public:
   //////////////////////////////////////////////////////
   // Random number generators
   //////////////////////////////////////////////////////
+  
+  //Return true if the RNG objects have been instantiated
+  bool haveRNGs() const{ return have_RNG; }
 
   void AddRNGs(std::string s = "") {
     // Couple the RNGs to the GridModule tagged by s
diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h
index aa28c6c8..a70204fb 100644
--- a/Grid/qcd/hmc/integrators/Integrator.h
+++ b/Grid/qcd/hmc/integrators/Integrator.h
@@ -136,8 +136,14 @@ protected:
       if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
       force = FieldImplementation::projectForce(force); // Ta for gauge fields
       double end_force = usecond();
-      Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites());
-      std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << std::endl;
+
+      Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm.  nb. norm2(latt) = \sum_x norm2(latt[x]) 
+      Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;    
+
+      Real max_force_abs = std::sqrt(maxLocalNorm2(force));
+      Real max_impulse_abs = max_force_abs * ep * HMC_MOMENTUM_DENOMINATOR;    
+
+      std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] Force average: " << force_abs << " Max force: " << max_force_abs << " Time step: " << ep << " Impulse average: " << impulse_abs << " Max impulse: " << max_impulse_abs << std::endl;
       Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;; 
       double end_full = usecond();
       double time_full  = (end_full - start_full) / 1e3;
diff --git a/Grid/sitmo_rng/README b/Grid/random/README
similarity index 100%
rename from Grid/sitmo_rng/README
rename to Grid/random/README
diff --git a/Grid/random/gaussian.h b/Grid/random/gaussian.h
new file mode 100644
index 00000000..e29d00ac
--- /dev/null
+++ b/Grid/random/gaussian.h
@@ -0,0 +1,200 @@
+// -*- C++ -*-
+//===--------------------------- random -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Peter Boyle: Taken from libc++ in Clang/LLVM.
+// Reason is that libstdc++ and clang differ in their return order in the normal_distribution / box mueller type step.
+// standardise on one and call it "gaussian_distribution".
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cmath>
+#include <type_traits>
+#include <initializer_list>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+#include <vector>
+#include <string>
+#include <istream>
+#include <ostream>
+#include <random>
+
+// normal_distribution -> gaussian distribution
+namespace Grid {
+
+template<class _RealType = double>
+class  gaussian_distribution
+{
+public:
+    // types
+    typedef _RealType result_type;
+
+    class param_type
+    {
+        result_type __mean_;
+        result_type __stddev_;
+    public:
+        typedef gaussian_distribution distribution_type;
+
+        strong_inline
+        explicit param_type(result_type __mean = 0, result_type __stddev = 1)
+            : __mean_(__mean), __stddev_(__stddev) {}
+
+        strong_inline
+        result_type mean() const {return __mean_;}
+        strong_inline
+        result_type stddev() const {return __stddev_;}
+
+        friend strong_inline
+            bool operator==(const param_type& __x, const param_type& __y)
+            {return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;}
+        friend strong_inline
+            bool operator!=(const param_type& __x, const param_type& __y)
+            {return !(__x == __y);}
+    };
+
+private:
+    param_type __p_;
+    result_type _V_;
+    bool _V_hot_;
+
+public:
+    // constructors and reset functions
+    strong_inline
+    explicit gaussian_distribution(result_type __mean = 0, result_type __stddev = 1)
+        : __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
+    strong_inline
+    explicit gaussian_distribution(const param_type& __p)
+        : __p_(__p), _V_hot_(false) {}
+    strong_inline
+    void reset() {_V_hot_ = false;}
+
+    // generating functions
+    template<class _URNG>
+        strong_inline
+        result_type operator()(_URNG& __g)
+        {return (*this)(__g, __p_);}
+    template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+    // property functions
+    strong_inline
+    result_type mean() const {return __p_.mean();}
+    strong_inline
+    result_type stddev() const {return __p_.stddev();}
+
+    strong_inline
+    param_type param() const {return __p_;}
+    strong_inline
+    void param(const param_type& __p) {__p_ = __p;}
+
+    strong_inline
+    result_type min() const {return -std::numeric_limits<result_type>::infinity();}
+    strong_inline
+    result_type max() const {return std::numeric_limits<result_type>::infinity();}
+
+    friend strong_inline
+        bool operator==(const gaussian_distribution& __x,
+                        const gaussian_distribution& __y)
+        {return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ &&
+                (!__x._V_hot_ || __x._V_ == __y._V_);}
+    friend strong_inline
+        bool operator!=(const gaussian_distribution& __x,
+                        const gaussian_distribution& __y)
+        {return !(__x == __y);}
+
+    template <class _CharT, class _Traits, class _RT>
+    friend
+    std::basic_ostream<_CharT, _Traits>&
+    operator<<(std::basic_ostream<_CharT, _Traits>& __os,
+               const gaussian_distribution<_RT>& __x);
+
+    template <class _CharT, class _Traits, class _RT>
+    friend
+    std::basic_istream<_CharT, _Traits>&
+    operator>>(std::basic_istream<_CharT, _Traits>& __is,
+               gaussian_distribution<_RT>& __x);
+};
+
+template <class _RealType>
+template<class _URNG>
+_RealType
+gaussian_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+    result_type _Up;
+    if (_V_hot_)
+    {
+        _V_hot_ = false;
+        _Up = _V_;
+    }
+    else
+    {
+        std::uniform_real_distribution<result_type> _Uni(-1, 1);
+        result_type __u;
+        result_type __v;
+        result_type __s;
+        do
+        {
+            __u = _Uni(__g);
+            __v = _Uni(__g);
+            __s = __u * __u + __v * __v;
+        } while (__s > 1 || __s == 0);
+        result_type _Fp = std::sqrt(-2 * std::log(__s) / __s);
+        _V_ = __v * _Fp;
+        _V_hot_ = true;
+        _Up = __u * _Fp;
+    }
+    return _Up * __p.stddev() + __p.mean();
+}
+
+template <class _CharT, class _Traits, class _RT>
+std::basic_ostream<_CharT, _Traits>&
+operator<<(std::basic_ostream<_CharT, _Traits>& __os,
+           const gaussian_distribution<_RT>& __x)
+{
+    auto __save_flags = __os.flags();
+    __os.flags(std::ios_base::dec | std::ios_base::left | std::ios_base::fixed |
+               std::ios_base::scientific);
+    _CharT __sp = __os.widen(' ');
+    __os.fill(__sp);
+    __os << __x.mean() << __sp << __x.stddev() << __sp << __x._V_hot_;
+    if (__x._V_hot_)
+        __os << __sp << __x._V_;
+    __os.flags(__save_flags);
+    return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+std::basic_istream<_CharT, _Traits>&
+operator>>(std::basic_istream<_CharT, _Traits>& __is,
+           gaussian_distribution<_RT>& __x)
+{
+    typedef gaussian_distribution<_RT> _Eng;
+    typedef typename _Eng::result_type result_type;
+    typedef typename _Eng::param_type param_type;
+    auto __save_flags = __is.flags();
+    __is.flags(std::ios_base::dec | std::ios_base::skipws);
+    result_type __mean;
+    result_type __stddev;
+    result_type _Vp = 0;
+    bool _V_hot = false;
+    __is >> __mean >> __stddev >> _V_hot;
+    if (_V_hot)
+        __is >> _Vp;
+    if (!__is.fail())
+    {
+        __x.param(param_type(__mean, __stddev));
+        __x._V_hot_ = _V_hot;
+        __x._V_ = _Vp;
+    }
+    __is.flags(__save_flags);
+    return __is;
+}
+}
diff --git a/Grid/sitmo_rng/sitmo_prng_engine.hpp b/Grid/random/sitmo_prng_engine.hpp
similarity index 100%
rename from Grid/sitmo_rng/sitmo_prng_engine.hpp
rename to Grid/random/sitmo_prng_engine.hpp
diff --git a/Grid/tensors/Tensor_extract_merge.h b/Grid/tensors/Tensor_extract_merge.h
index f1ded209..a563eac7 100644
--- a/Grid/tensors/Tensor_extract_merge.h
+++ b/Grid/tensors/Tensor_extract_merge.h
@@ -208,5 +208,46 @@ void merge(vobj &vec,ExtractPointerArray<sobj> &extracted, int offset)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////////
+//Copy a single lane of a SIMD tensor type from one object to another
+//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
+///////////////////////////////////////////////////////////////////////////////////
+template<class vobjOut, class vobjIn>
+accelerator_inline 
+void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
+{
+  static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
+
+  typedef typename vobjOut::vector_type ovector_type;  
+  typedef typename vobjIn::vector_type ivector_type;  
+  constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
+  constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
+  static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
+
+  typedef typename vobjOut::scalar_type oscalar_type;  
+  typedef typename vobjIn::scalar_type iscalar_type;  
+  typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
+  typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
+
+  typedef oextract_type * opointer;
+  typedef iextract_type * ipointer;
+
+  constexpr int oNsimd=ovector_type::Nsimd();
+  constexpr int iNsimd=ivector_type::Nsimd();
+
+  iscalar_type itmp;
+  oscalar_type otmp;
+
+  opointer __restrict__  op = (opointer)&vecOut;
+  ipointer __restrict__  ip = (ipointer)&vecIn;
+  for(int w=0;w<owords;w++){
+    memcpy( (char*)&itmp, (char*)(ip + lane_in + iNsimd*w), sizeof(iscalar_type) );
+    otmp = itmp; //potential precision change
+    memcpy( (char*)(op + lane_out + oNsimd*w), (char*)&otmp, sizeof(oscalar_type) );
+  }
+}
+
+
 NAMESPACE_END(Grid);
 
diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h
index 56b85c72..6f5b1066 100644
--- a/Grid/threads/Accelerator.h
+++ b/Grid/threads/Accelerator.h
@@ -192,7 +192,7 @@ inline void *acceleratorAllocShared(size_t bytes)
   auto err = cudaMallocManaged((void **)&ptr,bytes);
   if( err != cudaSuccess ) {
     ptr = (void *) NULL;
-    printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
+    printf(" cudaMallocManaged failed for %lu %s \n",bytes,cudaGetErrorString(err));
   }
   return ptr;
 };
@@ -202,7 +202,7 @@ inline void *acceleratorAllocDevice(size_t bytes)
   auto err = cudaMalloc((void **)&ptr,bytes);
   if( err != cudaSuccess ) {
     ptr = (void *) NULL;
-    printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
+    printf(" cudaMalloc failed for %lu %s \n",bytes,cudaGetErrorString(err));
   }
   return ptr;
 };
diff --git a/HMC/DWF2p1fIwasakiGparity.cc b/HMC/DWF2p1fIwasakiGparity.cc
new file mode 100644
index 00000000..c02ada51
--- /dev/null
+++ b/HMC/DWF2p1fIwasakiGparity.cc
@@ -0,0 +1,473 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./HMC/DWF2p1fIwasakiGparity.cc
+
+Copyright (C) 2015-2016
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+//2+1f DWF+I ensemble with G-parity BCs
+//designed to reproduce ensembles in https://arxiv.org/pdf/1908.08640.pdf
+struct RatQuoParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
+				  double, bnd_lo,
+				  double, bnd_hi,
+				  Integer, action_degree,
+				  double, action_tolerance,
+				  Integer, md_degree,
+				  double, md_tolerance,
+				  Integer, reliable_update_freq,
+				  Integer, bnd_check_freq);
+  RatQuoParameters() { 
+    bnd_lo = 1e-2;
+    bnd_hi = 30;
+    action_degree = 10;
+    action_tolerance = 1e-10;
+    md_degree = 10;
+    md_tolerance = 1e-8;
+    bnd_check_freq = 20;
+    reliable_update_freq = 50;
+  }
+
+  void Export(RationalActionParams &into) const{
+    into.lo = bnd_lo;
+    into.hi = bnd_hi;
+    into.action_degree = action_degree;
+    into.action_tolerance = action_tolerance;
+    into.md_degree = md_degree;
+    into.md_tolerance = md_tolerance;
+    into.BoundsCheckFreq = bnd_check_freq;
+  }
+};
+
+
+struct EvolParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
+                                  Integer, StartTrajectory,
+                                  Integer, Trajectories,
+				  Integer, SaveInterval,
+				  Integer, Steps,
+                                  bool, MetropolisTest,
+				  std::string, StartingType,
+				  std::vector<Integer>, GparityDirs,
+				  RatQuoParameters, rat_quo_l,
+				  RatQuoParameters, rat_quo_s);
+
+  EvolParameters() {
+    //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
+    MetropolisTest    = false;
+    StartTrajectory   = 0;
+    Trajectories      = 50;
+    SaveInterval = 5;
+    StartingType      = "ColdStart";
+    GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
+    Steps = 5;
+  }
+};
+
+bool fileExists(const std::string &fn){
+  std::ifstream f(fn);
+  return f.good();
+}
+
+
+
+
+struct LanczosParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
+				  double, alpha,
+				  double, beta,
+				  double, mu,
+				  int, ord,
+				  int, n_stop,
+				  int, n_want,
+				  int, n_use,
+				  double, tolerance);
+
+  LanczosParameters() {
+    alpha = 35;
+    beta = 5;
+    mu = 0;
+    ord = 100;
+    n_stop = 10;
+    n_want = 10;
+    n_use = 15;
+    tolerance = 1e-6;
+  }
+};
+
+
+
+template<typename FermionActionD, typename FermionFieldD>
+void computeEigenvalues(std::string param_file,
+			GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+			FermionActionD &action, GridParallelRNG &rng){
+  
+  LanczosParameters params;
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "LanczosParameters", params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "LanczosParameters", params);
+  }
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  action.ImportGauge(latt);
+
+  SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
+  PlainHermOp<FermionFieldD> hermop_wrap(hermop);
+  //ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
+  assert(params.mu == 0.0);
+
+  Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
+  FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
+
+  std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
+  ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000);
+
+  std::vector<RealD> eval(params.n_use);
+  std::vector<FermionFieldD> evec(params.n_use, rbGrid);
+  int Nconv;
+  IRL.calc(eval, evec, gauss_o, Nconv);
+
+  std::cout << "Eigenvalues:" << std::endl;
+  for(int i=0;i<params.n_want;i++){
+    std::cout << i << " " << eval[i] << std::endl;
+  }
+}
+
+
+//Check the quality of the RHMC approx
+template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
+void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+	       FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
+	       int inv_pow, const std::string &quark_descr){
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  numOp.ImportGauge(latt);
+  denOp.ImportGauge(latt);
+
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
+  SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
+      
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+
+  std::cout << "-------------------------------------------------------------------------------" << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD); 
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+}
+
+
+
+
+
+
+
+
+
+int main(int argc, char **argv) {
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  std::string param_file = "params.xml";
+  bool file_load_check = false;
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--param_file"){
+      assert(i!=argc-1);
+      param_file = argv[i+1];
+    }else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
+      file_load_check = true;
+    }
+  }
+
+  //Read the user parameters
+  EvolParameters user_params;
+  
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "Params", user_params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "Params", user_params);
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+  //Check the parameters
+  if(user_params.GparityDirs.size() != Nd-1){
+    std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
+    exit(1);
+  }
+  for(int i=0;i<Nd-1;i++)
+    if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
+      std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
+      exit(1);
+    }
+
+   // Typedefs to simplify notation
+  typedef GparityDomainWallFermionD FermionActionD;
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef GparityDomainWallFermionF FermionActionF;
+  typedef typename FermionActionF::Impl_t FermionImplPolicyF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  IntegratorParameters MD;
+  typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
+  typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
+  MD.name    = std::string("MinimumNorm2");
+  MD.MDsteps = user_params.Steps;
+  MD.trajL   = 1.0;
+
+  HMCparameters HMCparams;
+  HMCparams.StartTrajectory  = user_params.StartTrajectory;
+  HMCparams.Trajectories     = user_params.Trajectories;
+  HMCparams.NoMetropolisUntil= 0;
+  HMCparams.StartingType     = user_params.StartingType;
+  HMCparams.MetropolisTest = user_params.MetropolisTest;
+  HMCparams.MD = MD;
+  HMCWrapper TheHMC(HMCparams);
+
+  // Grid from the command line arguments --grid and --mpi
+  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
+
+  CheckpointerParameters CPparams;
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix    = "ckpoint_rng";
+  CPparams.saveInterval  = user_params.SaveInterval;
+  CPparams.format        = "IEEE64BIG";
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+
+  const int Ls      = 16;
+  Real beta         = 2.13;
+  Real light_mass   = 0.01;
+  Real strange_mass = 0.032;
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+
+  //Setup the Grids
+  auto GridPtrD   = TheHMC.Resources.GetCartesian();
+  auto GridRBPtrD = TheHMC.Resources.GetRBCartesian();
+  auto FGridD     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrD);
+  auto FrbGridD   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrD);
+
+  GridCartesian* GridPtrF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
+
+  ConjugateIwasakiGaugeActionD GaugeAction(beta);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(GridPtrD);
+  LatticeGaugeFieldF Uf(GridPtrF);
+ 
+  //Setup the BCs
+  FermionActionD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+
+  std::vector<int> dirs4(Nd);
+  for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
+  dirs4[Nd-1] = 0; //periodic gauge BC in time
+
+  GaugeImplPolicy::setDirections(dirs4); //gauge BC
+
+  //Run optional gauge field checksum checker and exit
+  if(file_load_check){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  ////////////////////////////////////
+  // Collect actions
+  ////////////////////////////////////
+  ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
+  ActionLevel<HMCWrapper::Field> Level2(8); //gauge (8 increments per step)
+
+
+  /////////////////////////////////////////////////////////////
+  // Light action
+  /////////////////////////////////////////////////////////////
+
+  FermionActionD Numerator_lD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD, light_mass,M5,Params);
+  FermionActionD Denominator_lD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD, pv_mass,M5,Params);
+
+  FermionActionF Numerator_lF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF, light_mass,M5,Params);
+  FermionActionF Denominator_lF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF, pv_mass,M5,Params);
+
+  RationalActionParams rat_act_params_l;
+  rat_act_params_l.inv_pow  = 2; // (M^dag M)^{1/2}
+  rat_act_params_l.precision= 60;
+  rat_act_params_l.MaxIter  = 10000;
+  user_params.rat_quo_l.Export(rat_act_params_l);
+  std::cout << GridLogMessage << " Light quark bounds check every " << rat_act_params_l.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+ 
+  MixedPrecRHMC Quotient_l(Denominator_lD, Numerator_lD, Denominator_lF, Numerator_lF, rat_act_params_l, user_params.rat_quo_l.reliable_update_freq);
+  //DoublePrecRHMC Quotient_l(Denominator_lD, Numerator_lD, rat_act_params_l);
+  Level1.push_back(&Quotient_l);
+
+
+  ////////////////////////////////////
+  // Strange action
+  ////////////////////////////////////
+  FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD,strange_mass,M5,Params);
+  FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD, pv_mass,M5,Params);
+
+  FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,strange_mass,M5,Params);
+  FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF, pv_mass,M5,Params);
+
+  RationalActionParams rat_act_params_s;
+  rat_act_params_s.inv_pow  = 4; // (M^dag M)^{1/4}
+  rat_act_params_s.precision= 60;
+  rat_act_params_s.MaxIter  = 10000;
+  user_params.rat_quo_s.Export(rat_act_params_s);
+  std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_l.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); 
+  //DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); 
+  Level1.push_back(&Quotient_s);  
+
+
+  /////////////////////////////////////////////////////////////
+  // Gauge action
+  /////////////////////////////////////////////////////////////
+  Level2.push_back(&GaugeAction);
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  std::cout << GridLogMessage << " Action complete "<< std::endl;
+
+
+  //Action tuning
+  bool tune_rhmc_l=false, tune_rhmc_s=false, eigenrange_l=false, eigenrange_s=false; 
+  std::string lanc_params_l, lanc_params_s;
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--tune_rhmc_l") tune_rhmc_l=true;
+    else if(sarg == "--tune_rhmc_s") tune_rhmc_s=true;
+    else if(sarg == "--eigenrange_l"){
+      assert(i < argc-1);
+      eigenrange_l=true;
+      lanc_params_l = argv[i+1];
+    }
+    else if(sarg == "--eigenrange_s"){
+      assert(i < argc-1);
+      eigenrange_s=true;
+      lanc_params_s = argv[i+1];
+    }
+  }
+  if(tune_rhmc_l || tune_rhmc_s || eigenrange_l || eigenrange_s){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    if(eigenrange_l) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_l, FGridD, FrbGridD, Ud, Numerator_lD, TheHMC.Resources.GetParallelRNG());
+    if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_l) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_l)>(FGridD, FrbGridD, Ud, Numerator_lD, Denominator_lD, Quotient_l, TheHMC.Resources.GetParallelRNG(), 2, "light");
+    if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange");
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  //Run the HMC
+  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
+  TheHMC.Run();
+
+  std::cout << GridLogMessage << " Done" << std::endl;
+  Grid_finalize();
+  return 0;
+} // main
+
diff --git a/HMC/DWF2p1fIwasakiGparityRHMCdouble.cc b/HMC/DWF2p1fIwasakiGparityRHMCdouble.cc
new file mode 100644
index 00000000..58295d5e
--- /dev/null
+++ b/HMC/DWF2p1fIwasakiGparityRHMCdouble.cc
@@ -0,0 +1,473 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./HMC/DWF2p1fIwasakiGparity.cc
+
+Copyright (C) 2015-2016
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+//2+1f DWF+I ensemble with G-parity BCs
+//designed to reproduce ensembles in https://arxiv.org/pdf/1908.08640.pdf
+struct RatQuoParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
+				  double, bnd_lo,
+				  double, bnd_hi,
+				  Integer, action_degree,
+				  double, action_tolerance,
+				  Integer, md_degree,
+				  double, md_tolerance,
+				  Integer, reliable_update_freq,
+				  Integer, bnd_check_freq);
+  RatQuoParameters() { 
+    bnd_lo = 1e-2;
+    bnd_hi = 30;
+    action_degree = 10;
+    action_tolerance = 1e-10;
+    md_degree = 10;
+    md_tolerance = 1e-8;
+    bnd_check_freq = 20;
+    reliable_update_freq = 50;
+  }
+
+  void Export(RationalActionParams &into) const{
+    into.lo = bnd_lo;
+    into.hi = bnd_hi;
+    into.action_degree = action_degree;
+    into.action_tolerance = action_tolerance;
+    into.md_degree = md_degree;
+    into.md_tolerance = md_tolerance;
+    into.BoundsCheckFreq = bnd_check_freq;
+  }
+};
+
+
+struct EvolParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
+                                  Integer, StartTrajectory,
+                                  Integer, Trajectories,
+				  Integer, SaveInterval,
+				  Integer, Steps,
+                                  bool, MetropolisTest,
+				  std::string, StartingType,
+				  std::vector<Integer>, GparityDirs,
+				  RatQuoParameters, rat_quo_l,
+				  RatQuoParameters, rat_quo_s);
+
+  EvolParameters() {
+    //For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
+    MetropolisTest    = false;
+    StartTrajectory   = 0;
+    Trajectories      = 50;
+    SaveInterval = 5;
+    StartingType      = "ColdStart";
+    GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
+    Steps = 5;
+  }
+};
+
+bool fileExists(const std::string &fn){
+  std::ifstream f(fn);
+  return f.good();
+}
+
+
+
+
+struct LanczosParameters: Serializable {
+  GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
+				  double, alpha,
+				  double, beta,
+				  double, mu,
+				  int, ord,
+				  int, n_stop,
+				  int, n_want,
+				  int, n_use,
+				  double, tolerance);
+
+  LanczosParameters() {
+    alpha = 35;
+    beta = 5;
+    mu = 0;
+    ord = 100;
+    n_stop = 10;
+    n_want = 10;
+    n_use = 15;
+    tolerance = 1e-6;
+  }
+};
+
+
+
+template<typename FermionActionD, typename FermionFieldD>
+void computeEigenvalues(std::string param_file,
+			GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+			FermionActionD &action, GridParallelRNG &rng){
+  
+  LanczosParameters params;
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "LanczosParameters", params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "LanczosParameters", params);
+  }
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  action.ImportGauge(latt);
+
+  SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
+  PlainHermOp<FermionFieldD> hermop_wrap(hermop);
+  //ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
+  assert(params.mu == 0.0);
+
+  Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
+  FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
+
+  std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
+  ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000);
+
+  std::vector<RealD> eval(params.n_use);
+  std::vector<FermionFieldD> evec(params.n_use, rbGrid);
+  int Nconv;
+  IRL.calc(eval, evec, gauss_o, Nconv);
+
+  std::cout << "Eigenvalues:" << std::endl;
+  for(int i=0;i<params.n_want;i++){
+    std::cout << i << " " << eval[i] << std::endl;
+  }
+}
+
+
+//Check the quality of the RHMC approx
+template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
+void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt,  //expect lattice to have been initialized to something
+	       FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
+	       int inv_pow, const std::string &quark_descr){
+
+  FermionFieldD gauss_o(rbGrid);
+  FermionFieldD gauss(Grid);
+  gaussian(rng, gauss);
+  pickCheckerboard(Odd, gauss_o, gauss);
+
+  numOp.ImportGauge(latt);
+  denOp.ImportGauge(latt);
+
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
+  SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
+      
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
+  std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+
+  std::cout << "-------------------------------------------------------------------------------" << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD); 
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+  InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
+
+  std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+  InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
+  std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
+}
+
+
+
+
+
+
+
+
+
+int main(int argc, char **argv) {
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  std::string param_file = "params.xml";
+  bool file_load_check = false;
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--param_file"){
+      assert(i!=argc-1);
+      param_file = argv[i+1];
+    }else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
+      file_load_check = true;
+    }
+  }
+
+  //Read the user parameters
+  EvolParameters user_params;
+  
+  if(fileExists(param_file)){
+    std::cout << GridLogMessage << " Reading " << param_file << std::endl;
+    Grid::XmlReader rd(param_file);
+    read(rd, "Params", user_params);
+  }else if(!GlobalSharedMemory::WorldRank){
+    std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
+    std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
+    Grid::XmlWriter wr(param_file + ".templ");
+    write(wr, "Params", user_params);
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+  //Check the parameters
+  if(user_params.GparityDirs.size() != Nd-1){
+    std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
+    exit(1);
+  }
+  for(int i=0;i<Nd-1;i++)
+    if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
+      std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
+      exit(1);
+    }
+
+   // Typedefs to simplify notation
+  typedef GparityDomainWallFermionD FermionActionD;
+  typedef typename FermionActionD::Impl_t FermionImplPolicyD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef GparityDomainWallFermionF FermionActionF;
+  typedef typename FermionActionF::Impl_t FermionImplPolicyF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  IntegratorParameters MD;
+  typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
+  typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
+  MD.name    = std::string("MinimumNorm2");
+  MD.MDsteps = user_params.Steps;
+  MD.trajL   = 1.0;
+
+  HMCparameters HMCparams;
+  HMCparams.StartTrajectory  = user_params.StartTrajectory;
+  HMCparams.Trajectories     = user_params.Trajectories;
+  HMCparams.NoMetropolisUntil= 0;
+  HMCparams.StartingType     = user_params.StartingType;
+  HMCparams.MetropolisTest = user_params.MetropolisTest;
+  HMCparams.MD = MD;
+  HMCWrapper TheHMC(HMCparams);
+
+  // Grid from the command line arguments --grid and --mpi
+  TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
+
+  CheckpointerParameters CPparams;
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix    = "ckpoint_rng";
+  CPparams.saveInterval  = user_params.SaveInterval;
+  CPparams.format        = "IEEE64BIG";
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  //Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+
+  const int Ls      = 16;
+  Real beta         = 2.13;
+  Real light_mass   = 0.01;
+  Real strange_mass = 0.032;
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+
+  //Setup the Grids
+  auto GridPtrD   = TheHMC.Resources.GetCartesian();
+  auto GridRBPtrD = TheHMC.Resources.GetRBCartesian();
+  auto FGridD     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrD);
+  auto FrbGridD   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrD);
+
+  GridCartesian* GridPtrF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
+
+  ConjugateIwasakiGaugeActionD GaugeAction(beta);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(GridPtrD);
+  LatticeGaugeFieldF Uf(GridPtrF);
+ 
+  //Setup the BCs
+  FermionActionD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+
+  std::vector<int> dirs4(Nd);
+  for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
+  dirs4[Nd-1] = 0; //periodic gauge BC in time
+
+  GaugeImplPolicy::setDirections(dirs4); //gauge BC
+
+  //Run optional gauge field checksum checker and exit
+  if(file_load_check){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  ////////////////////////////////////
+  // Collect actions
+  ////////////////////////////////////
+  ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
+  ActionLevel<HMCWrapper::Field> Level2(8); //gauge (8 increments per step)
+
+
+  /////////////////////////////////////////////////////////////
+  // Light action
+  /////////////////////////////////////////////////////////////
+
+  FermionActionD Numerator_lD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD, light_mass,M5,Params);
+  FermionActionD Denominator_lD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD, pv_mass,M5,Params);
+
+  FermionActionF Numerator_lF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF, light_mass,M5,Params);
+  FermionActionF Denominator_lF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF, pv_mass,M5,Params);
+
+  RationalActionParams rat_act_params_l;
+  rat_act_params_l.inv_pow  = 2; // (M^dag M)^{1/2}
+  rat_act_params_l.precision= 60;
+  rat_act_params_l.MaxIter  = 10000;
+  user_params.rat_quo_l.Export(rat_act_params_l);
+  std::cout << GridLogMessage << " Light quark bounds check every " << rat_act_params_l.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+ 
+  //MixedPrecRHMC Quotient_l(Denominator_lD, Numerator_lD, Denominator_lF, Numerator_lF, rat_act_params_l, user_params.rat_quo_l.reliable_update_freq);
+  DoublePrecRHMC Quotient_l(Denominator_lD, Numerator_lD, rat_act_params_l);
+  Level1.push_back(&Quotient_l);
+
+
+  ////////////////////////////////////
+  // Strange action
+  ////////////////////////////////////
+  FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD,strange_mass,M5,Params);
+  FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*GridPtrD,*GridRBPtrD, pv_mass,M5,Params);
+
+  FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,strange_mass,M5,Params);
+  FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF, pv_mass,M5,Params);
+
+  RationalActionParams rat_act_params_s;
+  rat_act_params_s.inv_pow  = 4; // (M^dag M)^{1/4}
+  rat_act_params_s.precision= 60;
+  rat_act_params_s.MaxIter  = 10000;
+  user_params.rat_quo_s.Export(rat_act_params_s);
+  std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_l.BoundsCheckFreq << " trajectories (avg)" << std::endl;
+
+  //MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq); 
+  DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s); 
+  Level1.push_back(&Quotient_s);  
+
+
+  /////////////////////////////////////////////////////////////
+  // Gauge action
+  /////////////////////////////////////////////////////////////
+  Level2.push_back(&GaugeAction);
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  std::cout << GridLogMessage << " Action complete "<< std::endl;
+
+
+  //Action tuning
+  bool tune_rhmc_l=false, tune_rhmc_s=false, eigenrange_l=false, eigenrange_s=false; 
+  std::string lanc_params_l, lanc_params_s;
+  for(int i=1;i<argc;i++){
+    std::string sarg(argv[i]);
+    if(sarg == "--tune_rhmc_l") tune_rhmc_l=true;
+    else if(sarg == "--tune_rhmc_s") tune_rhmc_s=true;
+    else if(sarg == "--eigenrange_l"){
+      assert(i < argc-1);
+      eigenrange_l=true;
+      lanc_params_l = argv[i+1];
+    }
+    else if(sarg == "--eigenrange_s"){
+      assert(i < argc-1);
+      eigenrange_s=true;
+      lanc_params_s = argv[i+1];
+    }
+  }
+  if(tune_rhmc_l || tune_rhmc_s || eigenrange_l || eigenrange_s){
+    TheHMC.initializeGaugeFieldAndRNGs(Ud);
+    if(eigenrange_l) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_l, FGridD, FrbGridD, Ud, Numerator_lD, TheHMC.Resources.GetParallelRNG());
+    if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
+    if(tune_rhmc_l) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_l)>(FGridD, FrbGridD, Ud, Numerator_lD, Denominator_lD, Quotient_l, TheHMC.Resources.GetParallelRNG(), 2, "light");
+    if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange");
+
+    std::cout << GridLogMessage << " Done" << std::endl;
+    Grid_finalize();
+    return 0;
+  }
+
+
+  //Run the HMC
+  std::cout << GridLogMessage << " Running the HMC "<< std::endl;
+  TheHMC.Run();
+
+  std::cout << GridLogMessage << " Done" << std::endl;
+  Grid_finalize();
+  return 0;
+} // main
+
diff --git a/scripts/hmc.sh b/scripts/hmc.sh
index c2cd6593..4d7191ff 100755
--- a/scripts/hmc.sh
+++ b/scripts/hmc.sh
@@ -1,19 +1,27 @@
 #!/bin/bash
 
 LOG=$1
-SWEEPS=`grep dH $LOG | wc -l`
-SWEEPS=`expr $SWEEPS - 80`
+SWEEPS=`grep dH.= $LOG | wc -l`
+SWEEPS=`expr $SWEEPS - 100`
 echo
 echo $SWEEPS thermalised sweeps
 echo
-plaq=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10} END { print S/NR} ' `
-plaqe=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$10 ; SS=SS+$10*$10 } END { print sqrt( (SS/NR - S*S/NR/NR)/NR) } ' `
+plaq=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$12} END { print S/NR} ' `
+plaqe=`grep Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$12 ; SS=SS+$12*$12 } END { print sqrt( (SS/NR - S*S/NR/NR)/NR) } ' `
 echo "Plaquette: $plaq (${plaqe})"
 echo
 
-dHv=`grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+$10 ; SS=SS+$10*$10 } END { print sqrt(SS/NR) } ' `
-edH=`grep dH $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$10)} END { print S/NR} '`
-echo "<e-dH>: $edH"
+grep  Plaq $LOG | tail -n $SWEEPS | awk '{ S=S+$12/20; if(NR%20==0){ print NR/20, " ", S; S=0;} } '  > plaq.binned
+
+plaq=`cat plaq.binned  | awk '{ S=S+$2} END { print S/NR} ' `
+plaqe=`cat plaq.binned | awk '{ S=S+$2 ; SS=SS+$2*$2 } END { print sqrt( (SS/NR - S*S/NR/NR)/NR) } ' `
+echo "Binned Plaquette: $plaq (${plaqe})"
+echo
+
+dHv=`grep dH.= $LOG | tail -n $SWEEPS | awk '{ S=S+$16 ; SS=SS+$16*$16 } END { print sqrt(SS/NR) } ' `
+edH=`grep dH.= $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$16)} END { print S/NR} '`
+dedH=`grep dH.= $LOG | tail -n $SWEEPS | awk '{ S=S+exp(-$16); SS=SS+exp(-$16)*exp(-$16)} END { print sqrt( (SS/NR - S*S/NR/NR)/NR) } '`
+echo "<e-dH>: $edH (${dedH})"
 echo "<rms dH>: $dHv"
 
 TRAJ=`grep Acc $LOG | wc -l`
@@ -22,12 +30,13 @@ PACC=`expr  100 \* ${ACC} / ${TRAJ} `
 echo
 echo "Acceptance $PACC %  $ACC / $TRAJ "
 
-grep Plaq $LOG | awk '{ print $10 }' | uniq > plaq.dat
-grep dH $LOG | awk '{ print $10 }' > dH.dat
-echo set yrange [-0.2:1.0] > plot.gnu
+grep Plaq $LOG | awk '{ print $12 }' | uniq > plaq.dat
+grep dH.= $LOG | awk '{ print $16 }' > dH.dat
+echo set yrange [0.58:0.60] > plot.gnu
 echo set terminal 'pdf' >> plot.gnu
+echo "f(x) =0.588" >> plot.gnu
 echo "set output 'plaq.${LOG}.pdf'" >> plot.gnu
-echo "plot 'plaq.dat' w l, 'dH.dat' w l " >> plot.gnu
+echo "plot 'plaq.dat' w l, f(x) " >> plot.gnu
 echo
 gnuplot plot.gnu >& gnu.errs
 open plaq.${LOG}.pdf
diff --git a/tests/core/Test_fft.cc b/tests/core/Test_fft.cc
index 212b1a35..4ae0c23b 100644
--- a/tests/core/Test_fft.cc
+++ b/tests/core/Test_fft.cc
@@ -299,12 +299,12 @@ int main (int argc, char ** argv)
     SpinColourVectorD ferm; gaussian(sRNG,ferm);
     pokeSite(ferm,src,point);
 
-    const int Ls=32;
+    const int Ls=64;
     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
 
-    RealD mass=0.01;
-    RealD M5  =0.8;
+    RealD mass=1.0;
+    RealD M5  =0.99;
     DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
 
     // Momentum space prop
@@ -353,6 +353,12 @@ int main (int argc, char ** argv)
     std::cout << " Taking difference" <<std::endl;
     std::cout << "Ddwf result4 "<<norm2(result4)<<std::endl;
     std::cout << "Ddwf ref     "<<norm2(ref)<<std::endl;
+    auto twopoint = localInnerProduct(result4,result4);
+    std::vector<TComplex> pion_prop;
+    sliceSum(twopoint,pion_prop,Nd-1);
+    for(int t=0;t<pion_prop.size();t++){
+      std::cout << "Pion_prop["<<t<<"]="<<pion_prop[t]<<std::endl;
+    }
     
     diff = ref - result4;
     std::cout << "result - ref     "<<norm2(diff)<<std::endl;
@@ -383,7 +389,7 @@ int main (int argc, char ** argv)
     GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,&GRID);
     GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,&GRID);
 
-    RealD mass=0.01;
+    RealD mass=1.0;
     RealD M5  =0.8;
 
     OverlapWilsonCayleyTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);
diff --git a/tests/core/Test_gparity.cc b/tests/core/Test_gparity.cc
index b2068901..5bf98ba6 100644
--- a/tests/core/Test_gparity.cc
+++ b/tests/core/Test_gparity.cc
@@ -55,13 +55,17 @@ static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIM
 int main (int argc, char ** argv)
 {
   int nu = 0;
-
+  int tbc_aprd = 0; //use antiperiodic BCs in the time direction?
+  
   Grid_init(&argc,&argv);
 
   for(int i=1;i<argc;i++){
     if(std::string(argv[i]) == "--Gparity-dir"){
       std::stringstream ss; ss << argv[i+1]; ss >> nu;
       std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl;
+    }else if(std::string(argv[i]) == "--Tbc-APRD"){
+      tbc_aprd = 1;
+      std::cout << GridLogMessage << "Using antiperiodic BCs in the time direction" << std::endl;
     }
   }
 
@@ -155,13 +159,18 @@ int main (int argc, char ** argv)
 
   //Coordinate grid for reference
   LatticeInteger    xcoor_1f5(FGrid_1f);
-  LatticeCoordinate(xcoor_1f5,1+nu);
+  LatticeCoordinate(xcoor_1f5,1+nu); //note '1+nu'! This is because for 5D fields the s-direction is direction 0
   Replicate(src,src_1f);
   src_1f   = where( xcoor_1f5 >= Integer(L), 2.0*src_1f,src_1f );
 
   RealD mass=0.0;
   RealD M5=1.8;
-  StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS);
+
+  //Standard Dirac op
+  AcceleratorVector<Complex,4> bc_std(Nd, 1.0);
+  if(tbc_aprd) bc_std[Nd-1] = -1.; //antiperiodic time BC
+  StandardDiracOp::ImplParams std_params(bc_std);
+  StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS, std_params);
 
   StandardFermionField    src_o_1f(FrbGrid_1f);
   StandardFermionField result_o_1f(FrbGrid_1f);
@@ -172,9 +181,11 @@ int main (int argc, char ** argv)
   ConjugateGradient<StandardFermionField> CG(1.0e-8,10000);
   CG(HermOpEO,src_o_1f,result_o_1f);
   
-  //  const int nu = 3;
+  //Gparity Dirac op
   std::vector<int> twists(Nd,0);
   twists[nu] = 1;
+  if(tbc_aprd) twists[Nd-1] = 1;
+
   GparityDiracOp::ImplParams params;
   params.twists = twists;
   GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params);
@@ -271,8 +282,11 @@ int main (int argc, char ** argv)
   std::cout << "2f cb "<<result_o_2f.Checkerboard()<<std::endl;
   std::cout << "1f cb "<<result_o_1f.Checkerboard()<<std::endl;
 
-  std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
+  //Compare norms
+  std::cout << " result norms 2f: " <<norm2(result_o_2f)<<" 1f: " <<norm2(result_o_1f)<<std::endl;
 
+
+  //Take the 2f solution and convert into the corresponding 1f solution (odd cb only)
   StandardFermionField    res0o  (FrbGrid_2f); 
   StandardFermionField    res1o  (FrbGrid_2f); 
   StandardFermionField    res0  (FGrid_2f); 
@@ -281,14 +295,15 @@ int main (int argc, char ** argv)
   res0=Zero();
   res1=Zero();
 
-  res0o = PeekIndex<0>(result_o_2f,0);
-  res1o = PeekIndex<0>(result_o_2f,1);
+  res0o = PeekIndex<0>(result_o_2f,0); //flavor 0, odd cb
+  res1o = PeekIndex<0>(result_o_2f,1); //flavor 1, odd cb
 
   std::cout << "res cb "<<res0o.Checkerboard()<<std::endl;
   std::cout << "res cb "<<res1o.Checkerboard()<<std::endl;
 
-  setCheckerboard(res0,res0o);
-  setCheckerboard(res1,res1o);
+  //poke odd onto non-cb field
+  setCheckerboard(res0,res0o); 
+  setCheckerboard(res1,res1o); 
 
   StandardFermionField replica (FGrid_1f);
   StandardFermionField replica0(FGrid_1f);
@@ -296,12 +311,13 @@ int main (int argc, char ** argv)
   Replicate(res0,replica0);
   Replicate(res1,replica1);
 
+  //2nd half of doubled lattice has f=1
   replica = where( xcoor_1f5 >= Integer(L), replica1,replica0 );
 
   replica0 = Zero();
   setCheckerboard(replica0,result_o_1f);
 
-  std::cout << "Norm2 solutions is " <<norm2(replica)<<" "<< norm2(replica0)<<std::endl;
+  std::cout << "Norm2 solutions 1f reconstructed from 2f: " <<norm2(replica)<<" Actual 1f: "<< norm2(replica0)<<std::endl;
 
   replica = replica - replica0;
   
diff --git a/tests/core/Test_gparity_flavour.cc b/tests/core/Test_gparity_flavour.cc
new file mode 100644
index 00000000..5b204e04
--- /dev/null
+++ b/tests/core/Test_gparity_flavour.cc
@@ -0,0 +1,177 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: ./tests/Test_gparity_flavour.cc
+
+Copyright (C) 2015-2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+static constexpr double                      tolerance = 1.0e-6;
+static std::array<GparityFlavourMatrix, GparityFlavour::nSigma> testAlgebra;
+
+void print(const GparityFlavourMatrix &g)
+{
+  for(int i = 0; i < Ngp; i++)
+  {
+    std::cout << GridLogMessage << "(";
+    for(int j=0;j<Ngp;j++){
+      if ( abs( g(i,j)()() ) == 0 ) {
+        std::cout<< " 0";
+      } else if ( abs(g(i,j)()() - Complex(0,1)) == 0){
+        std::cout<< " i";
+      } else if ( abs(g(i,j)()() + Complex(0,1)) == 0){
+        std::cout<< "-i";
+      } else if ( abs(g(i,j)()() - Complex(1,0)) == 0){
+        std::cout<< " 1";
+      } else if ( abs(g(i,j)()() + Complex(1,0)) == 0){
+        std::cout<< "-1";
+      }
+      std::cout<<((j == Ngp-1) ? ")" : "," );
+    }
+    std::cout << std::endl;
+  }
+  std::cout << GridLogMessage << std::endl;
+}
+
+void createTestAlgebra(void)
+{
+  std::array<GparityFlavourMatrix, 3> testg;
+  const Complex             I(0., 1.), mI(0., -1.);
+
+  // 0 1
+  // 1 0
+  testg[0] = Zero();
+  testg[0](0, 1)()() = 1.;
+  testg[0](1, 0)()() = 1.;
+  std::cout << GridLogMessage << "test SigmaX= " << std::endl;
+  print(testg[0]);
+
+  // 0 -i
+  // i  0
+  testg[1] = Zero();
+  testg[1](0, 1)()() = mI;
+  testg[1](1, 0)()() = I;
+  std::cout << GridLogMessage << "test SigmaY= " << std::endl;
+  print(testg[1]);
+
+  // 1  0
+  // 0 -1
+  testg[2] = Zero();
+  testg[2](0, 0)()() = 1.0;
+  testg[2](1, 1)()() = -1.0;
+  std::cout << GridLogMessage << "test SigmaZ= " << std::endl;
+  print(testg[2]);
+
+  
+#define DEFINE_TEST_G(g, exp)\
+testAlgebra[GparityFlavour::Algebra::g]        = exp; \
+testAlgebra[GparityFlavour::Algebra::Minus##g] = -exp;
+  
+  DEFINE_TEST_G(SigmaX      , testg[0]);
+  DEFINE_TEST_G(SigmaY      , testg[1]);
+  DEFINE_TEST_G(SigmaZ      , testg[2]);
+  DEFINE_TEST_G(Identity    , 1.);
+
+  GparityFlavourMatrix pplus;
+  pplus = 1.0;
+  pplus = pplus + testg[1];
+  pplus = pplus * 0.5;
+
+  DEFINE_TEST_G(ProjPlus    , pplus);
+  
+  GparityFlavourMatrix pminus;
+  pminus = 1.0;
+  pminus = pminus - testg[1];
+  pminus = pminus * 0.5;
+
+  DEFINE_TEST_G(ProjMinus    , pminus);
+
+#undef DEFINE_TEST_G
+}
+
+template <typename Expr>
+void test(const Expr &a, const Expr &b)
+{
+  if (norm2(a - b) < tolerance)
+  {
+    std::cout << "[OK] ";
+  }
+  else
+  {
+    std::cout << "[fail]" << std::endl;
+    std::cout << GridLogError << "a= " << a << std::endl;
+    std::cout << GridLogError << "is different (tolerance= " << tolerance << ") from " << std::endl;
+    std::cout << GridLogError << "b= " << b << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+void checkSigma(const GparityFlavour::Algebra a, GridSerialRNG &rng)
+{
+  GparityFlavourVector v;
+  GparityFlavourMatrix m, &testg = testAlgebra[a];
+  GparityFlavour      g(a);
+  
+  random(rng, v);
+  random(rng, m);
+  
+  std::cout << GridLogMessage << "Checking " << GparityFlavour::name[a] << ": ";
+  std::cout << "vecmul ";
+  test(g*v, testg*v);
+  std::cout << "matlmul ";
+  test(g*m, testg*m);
+  std::cout << "matrmul ";
+  test(m*g, m*testg);
+  std::cout << std::endl;
+}
+
+int main(int argc, char *argv[])
+{
+  Grid_init(&argc,&argv);
+  
+  Coordinate latt_size   = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
+  Coordinate mpi_layout  = GridDefaultMpi();
+  
+  GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+  GridSerialRNG sRNG;
+  
+  sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
+  
+  std::cout << GridLogMessage << "======== Test algebra" << std::endl;
+  createTestAlgebra();
+  std::cout << GridLogMessage << "======== Multiplication operators check" << std::endl;
+  for (int i = 0; i < GparityFlavour::nSigma; ++i)
+  {
+    checkSigma(i, sRNG);
+  }
+  std::cout << GridLogMessage << std::endl;
+  
+  Grid_finalize();
+  
+  return EXIT_SUCCESS;
+}
diff --git a/tests/core/Test_precision_change.cc b/tests/core/Test_precision_change.cc
new file mode 100644
index 00000000..66819f31
--- /dev/null
+++ b/tests/core/Test_precision_change.cc
@@ -0,0 +1,114 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/core/Test_precision_change.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+
+int main (int argc, char ** argv){
+  Grid_init(&argc, &argv);
+  int Ls = 16;
+  std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " and Ls=" << Ls << std::endl;
+  GridCartesian* UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridCartesian* FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_d);
+  GridRedBlackCartesian* FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_d);
+
+  GridCartesian* UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridCartesian* FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_f);
+  GridRedBlackCartesian* FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_f);
+
+
+  std::vector<int> seeds4({1, 2, 3, 4});
+  std::vector<int> seeds5({5, 6, 7, 8});
+  GridParallelRNG RNG5(FGrid_d);
+  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG RNG4(UGrid_d);
+  RNG4.SeedFixedIntegers(seeds4);
+
+  //Gauge fields
+  LatticeGaugeFieldD Umu_d(UGrid_d);
+  LatticeGaugeFieldF Umu_f(UGrid_f);
+  LatticeGaugeFieldD Umu_d_r(UGrid_d);
+  LatticeGaugeFieldD Utmp_d(UGrid_d);
+
+  for(int i=0;i<5;i++){
+    random(RNG4, Umu_d);
+
+    precisionChange(Umu_f, Umu_d);
+    std::cout << GridLogMessage << "Norm of double-prec and single-prec gauge fields (should be ~equal): " << norm2(Umu_d) << " " << norm2(Umu_f) << std::endl;
+    precisionChange(Umu_d_r, Umu_f);
+    RealD normdiff = axpy_norm(Utmp_d, -1.0, Umu_d_r, Umu_d);
+    std::cout << GridLogMessage << "Norm of difference of back-converted double-prec gauge fields (should be ~0) = " << normdiff << std::endl;
+  }
+
+  //Fermion fields
+  LatticeFermionD psi_d(FGrid_d);
+  LatticeFermionF psi_f(FGrid_f);
+  LatticeFermionD psi_d_r(FGrid_d);
+  LatticeFermionD psi_tmp_d(FGrid_d);
+
+  for(int i=0;i<5;i++){
+    random(RNG5, psi_d);
+
+    precisionChange(psi_f, psi_d);
+    std::cout << GridLogMessage << "Norm of double-prec and single-prec fermion fields (should be ~equal): " << norm2(psi_d) << " " << norm2(psi_f) << std::endl;
+    precisionChange(psi_d_r, psi_f);
+    RealD normdiff = axpy_norm(psi_tmp_d, -1.0, psi_d_r, psi_d);
+    std::cout << GridLogMessage << "Norm of difference of back-converted double-prec fermion fields (should be ~0)= " << normdiff << std::endl;
+  }
+
+  //Checkerboarded fermion fields
+  LatticeFermionD psi_cb_d(FrbGrid_d);
+  LatticeFermionF psi_cb_f(FrbGrid_f);
+  LatticeFermionD psi_cb_d_r(FrbGrid_d);
+  LatticeFermionD psi_cb_tmp_d(FrbGrid_d);
+
+  for(int i=0;i<5;i++){
+    random(RNG5, psi_d);
+    pickCheckerboard(Odd, psi_cb_d, psi_d);
+     
+    precisionChange(psi_cb_f, psi_cb_d);
+    std::cout << GridLogMessage << "Norm of odd-cb double-prec and single-prec fermion fields (should be ~equal): " << norm2(psi_cb_d) << " " << norm2(psi_cb_f) << std::endl;
+    precisionChange(psi_cb_d_r, psi_cb_f);
+    RealD normdiff = axpy_norm(psi_cb_tmp_d, -1.0, psi_cb_d_r, psi_cb_d);
+    std::cout << GridLogMessage << "Norm of difference of back-converted odd-cb double-prec fermion fields (should be ~0)= " << normdiff << std::endl;
+
+
+    pickCheckerboard(Even, psi_cb_d, psi_d);
+     
+    precisionChange(psi_cb_f, psi_cb_d);
+    std::cout << GridLogMessage << "Norm of even-cb double-prec and single-prec fermion fields (should be ~equal): " << norm2(psi_cb_d) << " " << norm2(psi_cb_f) << std::endl;
+    precisionChange(psi_cb_d_r, psi_cb_f);
+    normdiff = axpy_norm(psi_cb_tmp_d, -1.0, psi_cb_d_r, psi_cb_d);
+    std::cout << GridLogMessage << "Norm of difference of back-converted even-cb double-prec fermion fields (should be ~0)= " << normdiff << std::endl;
+  }
+
+
+
+  Grid_finalize();
+}
diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc
index 1fa1c6e4..9db2c563 100644
--- a/tests/forces/Test_dwf_gpforce.cc
+++ b/tests/forces/Test_dwf_gpforce.cc
@@ -71,26 +71,14 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   RealD mass=0.2; //kills the diagonal term
   RealD M5=1.8;
-  //  const int nu = 3;
-  //  std::vector<int> twists(Nd,0); // twists[nu] = 1;
-  //  GparityDomainWallFermionR::ImplParams params;  params.twists = twists;
-  //  GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
 
-  //  DomainWallFermionR Dw     (U,     Grid,RBGrid,mass,M5);
-
-  const int nu = 3;
+  const int nu = 0; //gparity direction
   std::vector<int> twists(Nd,0);
   twists[nu] = 1;
+  twists[Nd-1] = 1; //antiperiodic in time
   GparityDomainWallFermionR::ImplParams params;
   params.twists = twists;
-
-  /*
-  params.boundary_phases[0] = 1.0;
-  params.boundary_phases[1] = 1.0;
-  params.boundary_phases[2] = 1.0;
-  params.boundary_phases[3] =- 1.0;
-  */
-  
+ 
   GparityDomainWallFermionR Dw(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
 
   Dw.M   (phi,Mphi);
diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc
index d6744080..b1c0dd80 100644
--- a/tests/forces/Test_gpdwf_force.cc
+++ b/tests/forces/Test_gpdwf_force.cc
@@ -71,8 +71,10 @@ int main (int argc, char ** argv)
   RealD mass=0.01; 
   RealD M5=1.8; 
 
-  const int nu = 3;
-  std::vector<int> twists(Nd,0);  twists[nu] = 1;
+  const int nu = 1;
+  std::vector<int> twists(Nd,0);
+  twists[nu] = 1;
+  twists[3] = 1;
   GparityDomainWallFermionR::ImplParams params;  params.twists = twists;
   GparityDomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
   Ddwf.M   (phi,Mphi);
@@ -91,16 +93,28 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   // Modify the gauge field a little 
   ////////////////////////////////////
-  RealD dt = 0.0001;
+  RealD dt = 0.01;
 
+  LatticeColourMatrix zz(UGrid); zz=Zero();
   LatticeColourMatrix mommu(UGrid); 
   LatticeColourMatrix forcemu(UGrid); 
   LatticeGaugeField mom(UGrid); 
   LatticeGaugeField Uprime(UGrid); 
 
+  const int Lnu=latt_size[nu];
+  Lattice<iScalar<vInteger> > coor(UGrid);
+  LatticeCoordinate(coor,nu);
   for(int mu=0;mu<Nd;mu++){
 
-    SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
+    // Traceless antihermitian momentum; gaussian in lie alg
+    SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
+    if(0){
+      if(mu==nu){
+	mommu=where(coor==Lnu-1,mommu,zz);
+      } else {
+	mommu=Zero();
+      }
+    }
 
     PokeIndex<LorentzIndex>(mom,mommu,mu);
 
@@ -125,6 +139,12 @@ int main (int argc, char ** argv)
 
   ComplexD Sprime    = innerProduct(MphiPrime   ,MphiPrime);
 
+
+  LatticeComplex lip(FGrid); lip=localInnerProduct(Mphi,Mphi);
+  LatticeComplex lipp(FGrid); lipp=localInnerProduct(MphiPrime,MphiPrime);
+  LatticeComplex dip(FGrid); dip = lipp - lip;
+  std::cout << " dip "<<dip<<std::endl;
+  
   //////////////////////////////////////////////
   // Use derivative to estimate dS
   //////////////////////////////////////////////
diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc
index d731f27a..b0b41676 100644
--- a/tests/forces/Test_gpwilson_force.cc
+++ b/tests/forces/Test_gpwilson_force.cc
@@ -64,8 +64,12 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   RealD mass=0.01; 
 
-  const int nu = 3;
-  std::vector<int> twists(Nd,0);  twists[nu] = 1;
+  const int nu = 1;
+  const int Lnu=latt_size[nu];
+
+  std::vector<int> twists(Nd,0);
+  twists[nu] = 1;
+  twists[3]=1;
   GparityWilsonFermionR::ImplParams params;  params.twists = twists;
   GparityWilsonFermionR Wil(U,*UGrid,*UrbGrid,mass,params);
   Wil.M   (phi,Mphi);
@@ -87,17 +91,28 @@ int main (int argc, char ** argv)
   RealD dt = 0.01;
 
   LatticeColourMatrix mommu(UGrid); 
+  LatticeColourMatrix zz(UGrid);
   LatticeColourMatrix forcemu(UGrid); 
   LatticeGaugeField mom(UGrid); 
   LatticeGaugeField Uprime(UGrid); 
 
+  
+  Lattice<iScalar<vInteger> > coor(UGrid);
+  LatticeCoordinate(coor,nu);
+  zz=Zero();
   for(int mu=0;mu<Nd;mu++){
 
     // Traceless antihermitian momentum; gaussian in lie alg
-    SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); 
-
+    SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
+    if(0){
+      if(mu==nu){
+	mommu=where(coor==Lnu-1,mommu,zz);
+      } else {
+	mommu=Zero();
+      }
+    }
     PokeIndex<LorentzIndex>(mom,mommu,mu);
-
+    
     // fourth order exponential approx
     autoView( mom_v, mom, CpuRead);
     autoView( U_v , U, CpuRead);
@@ -130,6 +145,10 @@ int main (int argc, char ** argv)
     mommu=Ta(mommu)*2.0;
     PokeIndex<LorentzIndex>(UdSdU,mommu,mu);
   }
+  LatticeComplex lip(UGrid); lip=localInnerProduct(Mphi,Mphi);
+  LatticeComplex lipp(UGrid); lipp=localInnerProduct(MphiPrime,MphiPrime);
+  LatticeComplex dip(UGrid); dip = lipp - lip;
+  std::cout << " dip "<<dip<<std::endl;
 
   LatticeComplex dS(UGrid); dS = Zero();
   for(int mu=0;mu<Nd;mu++){
@@ -139,12 +158,14 @@ int main (int argc, char ** argv)
     // Update PF action density
     dS = dS+trace(mommu*forcemu)*dt;
   }
-
+  std::cout << "mommu"<<mommu<<std::endl;
+  std::cout << "dS" << dS<<std::endl;
+  
   ComplexD dSpred    = sum(dS);
 
   std::cout << GridLogMessage << " S      "<<S<<std::endl;
   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl;
-  std::cout << GridLogMessage << "dS      "<<Sprime-S<<std::endl;
+  std::cout << GridLogMessage << "Delta S "<<Sprime-S<<std::endl;
   std::cout << GridLogMessage << "predict dS    "<< dSpred <<std::endl;
 
   assert( fabs(real(Sprime-S-dSpred)) < 2.0 ) ;
diff --git a/tests/hmc/Test_action_dwf_gparity2fvs1f.cc b/tests/hmc/Test_action_dwf_gparity2fvs1f.cc
new file mode 100644
index 00000000..830bcead
--- /dev/null
+++ b/tests/hmc/Test_action_dwf_gparity2fvs1f.cc
@@ -0,0 +1,257 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: tests/hmc/Test_action_dwf_gparity2fvs1f.cc
+
+    Copyright (C) 2015
+
+    Author: Christopher Kelly <ckelly@bnl.gov>
+    Author: paboyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+
+
+template<typename FermionField2f, typename FermionField1f>
+void copy2fTo1fFermionField(FermionField1f &out, const FermionField2f &in, int gpdir){
+  auto f0_halfgrid = PeekIndex<GparityFlavourIndex>(in,0); //on 2f Grid
+  FermionField1f f0_fullgrid_dbl(out.Grid());
+  Replicate(f0_halfgrid, f0_fullgrid_dbl); //double it up to live on the 1f Grid
+
+  auto f1_halfgrid = PeekIndex<GparityFlavourIndex>(in,1);
+  FermionField1f f1_fullgrid_dbl(out.Grid());
+  Replicate(f1_halfgrid, f1_fullgrid_dbl);
+  
+  const Coordinate &dim_2f = in.Grid()->GlobalDimensions();
+  const Coordinate &dim_1f = out.Grid()->GlobalDimensions();
+
+  //We have to be careful for 5d fields; the s-direction is placed before the x,y,z,t and so we need to shift gpdir by 1
+  std::cout << "gpdir " << gpdir << std::endl;
+
+  gpdir+=1;
+  std::cout << "gpdir for 5D fields " << gpdir << std::endl;
+
+  std::cout << "dim_2f " << dim_2f << std::endl;
+  std::cout << "dim_1f " << dim_1f << std::endl;
+  
+  assert(dim_1f[gpdir] == 2*dim_2f[gpdir]);
+
+  LatticeInteger xcoor_1f(out.Grid()); //5d lattice integer
+  LatticeCoordinate(xcoor_1f,gpdir);
+
+  int L = dim_2f[gpdir];
+
+  out = where(xcoor_1f < L, f0_fullgrid_dbl, f1_fullgrid_dbl);
+}
+
+//Both have the same field type
+void copy2fTo1fGaugeField(LatticeGaugeField &out, const LatticeGaugeField &in, int gpdir){
+  LatticeGaugeField U_dbl(out.Grid());
+  Replicate(in, U_dbl);
+  
+  LatticeGaugeField Uconj_dbl = conjugate( U_dbl );
+
+  const Coordinate &dim_2f = in.Grid()->GlobalDimensions();
+  
+  LatticeInteger xcoor_1f(out.Grid());
+  LatticeCoordinate(xcoor_1f,gpdir);
+
+  int L = dim_2f[gpdir];
+  
+  out = where(xcoor_1f < L, U_dbl, Uconj_dbl);
+}
+
+
+std::ostream & operator<<(std::ostream &os, const Coordinate &x){
+  os << "(";
+  for(int i=0;i<x.size();i++) os << x[i] <<  (i<x.size()-1 ? " " : "");
+  os << ")";
+  return os;
+}
+
+
+int main(int argc, char **argv) {
+  using namespace Grid;
+  
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  int Ls = 16;
+
+  Coordinate latt_2f = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(Nd, vComplexD::Nsimd());
+  Coordinate mpi_layout = GridDefaultMpi();
+
+  int mu = 0; //Gparity direction
+
+  Coordinate latt_1f = latt_2f;
+  latt_1f[mu] *= 2;
+
+  GridCartesian         * UGrid_1f   = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout);
+  GridRedBlackCartesian * UrbGrid_1f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_1f);
+  GridCartesian         * FGrid_1f   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_1f);
+  GridRedBlackCartesian * FrbGrid_1f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_1f);
+
+
+  GridCartesian         * UGrid_2f   = SpaceTimeGrid::makeFourDimGrid(latt_2f, simd_layout, mpi_layout);
+  GridRedBlackCartesian * UrbGrid_2f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_2f);
+  GridCartesian         * FGrid_2f   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_2f);
+  GridRedBlackCartesian * FrbGrid_2f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_2f);
+
+
+  std::cout << "SIMD layout " << simd_layout << std::endl;
+  std::cout << "MPI layout " << mpi_layout << std::endl;
+  std::cout << "2f dimensions " << latt_2f << std::endl;
+  std::cout << "1f dimensions " << latt_1f << std::endl;
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5_2f(FGrid_2f);  RNG5_2f.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4_2f(UGrid_2f);  RNG4_2f.SeedFixedIntegers(seeds4);
+
+  std::cout << "Generating hot 2f gauge configuration" << std::endl;
+  LatticeGaugeField Umu_2f(UGrid_2f);
+  SU<Nc>::HotConfiguration(RNG4_2f,Umu_2f);
+
+  std::cout << "Copying 2f->1f gauge field" << std::endl;
+  LatticeGaugeField Umu_1f(UGrid_1f);
+  copy2fTo1fGaugeField(Umu_1f, Umu_2f, mu);  
+
+  typedef GparityWilsonImplR FermionImplPolicy2f;
+  typedef GparityDomainWallFermionR FermionAction2f;
+  typedef typename FermionAction2f::FermionField FermionField2f;
+  
+  typedef WilsonImplR FermionImplPolicy1f;
+  typedef DomainWallFermionR FermionAction1f;
+  typedef typename FermionAction1f::FermionField FermionField1f;
+
+  std::cout << "Generating eta 2f" << std::endl;
+  FermionField2f eta_2f(FGrid_2f);
+  gaussian(RNG5_2f, eta_2f);
+
+  RealD scale = std::sqrt(0.5);
+  eta_2f=eta_2f*scale;
+
+  std::cout << "Copying 2f->1f eta" << std::endl;
+  FermionField1f eta_1f(FGrid_1f);
+  copy2fTo1fFermionField(eta_1f, eta_2f, mu);
+  
+  Real beta         = 2.13;
+  Real light_mass   = 0.01;
+  Real strange_mass = 0.032;
+  Real pv_mass      = 1.0;
+  RealD M5  = 1.8;
+
+  //Setup the Dirac operators
+  std::cout << "Initializing Dirac operators" << std::endl;
+  
+  FermionAction2f::ImplParams Params_2f;
+  Params_2f.twists[mu] = 1;
+  Params_2f.twists[Nd-1] = 1; //APBC in time direction
+
+  //note 'Num' and 'Den' here refer to the determinant ratio, not the operator ratio in the pseudofermion action where the two are inverted
+  //to my mind the Pauli Villars and 'denominator' are synonymous but the Grid convention has this as the 'Numerator' operator in the RHMC implementation
+  FermionAction2f NumOp_2f(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f, *UrbGrid_2f, light_mass,M5,Params_2f); 
+  FermionAction2f DenOp_2f(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f, *UrbGrid_2f, pv_mass, M5,Params_2f);
+
+  FermionAction1f::ImplParams Params_1f;
+  Params_1f.boundary_phases[mu] = -1; //antiperiodic in doubled lattice in GP direction
+  Params_1f.boundary_phases[Nd-1] = -1;
+  
+  FermionAction1f NumOp_1f(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f, *UrbGrid_1f, light_mass,M5,Params_1f);
+  FermionAction1f DenOp_1f(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f, *UrbGrid_1f, pv_mass, M5,Params_1f);
+
+  //Test the replication routines by running a CG on eta
+  double StoppingCondition = 1e-10;
+  double MaxCGIterations = 30000;
+  ConjugateGradient<FermionField2f>  CG_2f(StoppingCondition,MaxCGIterations);
+  ConjugateGradient<FermionField1f>  CG_1f(StoppingCondition,MaxCGIterations);
+
+  NumOp_1f.ImportGauge(Umu_1f);
+  NumOp_2f.ImportGauge(Umu_2f);
+
+  FermionField1f test_1f(FGrid_1f);
+  FermionField2f test_2f(FGrid_2f);
+  
+  MdagMLinearOperator<FermionAction1f, FermionField1f> Linop_1f(NumOp_1f);
+  MdagMLinearOperator<FermionAction2f, FermionField2f> Linop_2f(NumOp_2f);
+  
+  CG_1f(Linop_1f, eta_1f, test_1f);
+  CG_2f(Linop_2f, eta_2f, test_2f);
+  RealD test_1f_norm = norm2(test_1f);
+  RealD test_2f_norm = norm2(test_2f);
+
+  std::cout << "Verification of replication routines: " << test_1f_norm << " " << test_2f_norm << " " << test_1f_norm - test_2f_norm << std::endl;
+
+
+#if 1
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy2f> Action2f;
+  typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy1f> Action1f;
+
+  RationalActionParams rational_params;
+  rational_params.inv_pow = 2;
+  rational_params.lo = 1e-5;
+  rational_params.hi = 32;
+  rational_params.md_degree = 16;
+  rational_params.action_degree = 16;
+
+  Action2f action_2f(DenOp_2f, NumOp_2f, rational_params);
+  Action1f action_1f(DenOp_1f, NumOp_1f, rational_params);
+#else
+  typedef TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy2f> Action2f;
+  typedef TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy1f> Action1f;
+
+  Action2f action_2f(DenOp_2f, NumOp_2f, CG_2f, CG_2f);
+  Action1f action_1f(DenOp_1f, NumOp_1f, CG_1f, CG_1f);
+#endif
+
+
+  std::cout << "Action refresh" << std::endl;
+  action_2f.refresh(Umu_2f, eta_2f);
+  action_1f.refresh(Umu_1f, eta_1f);
+
+  std::cout << "Action compute post heatbath" << std::endl;
+  RealD S_2f = action_2f.S(Umu_2f);
+  RealD S_1f = action_1f.S(Umu_1f);
+
+  std::cout << "Action comparison post heatbath" << std::endl;
+  std::cout << S_2f << " " << S_1f << " " << S_2f-S_1f << std::endl;
+
+  //Change the gauge field between refresh and action eval else the matrix and inverse matrices all cancel and we just get |eta|^2
+  SU<Nc>::HotConfiguration(RNG4_2f,Umu_2f);
+  copy2fTo1fGaugeField(Umu_1f, Umu_2f, mu);  
+
+  //Now compute the action with the new gauge field
+  std::cout << "Action compute post gauge field update" << std::endl;
+  S_2f = action_2f.S(Umu_2f);
+  S_1f = action_1f.S(Umu_1f);
+
+  std::cout << "Action comparison post gauge field update" << std::endl;
+  std::cout << S_2f << " " << S_1f << " " << S_2f-S_1f << std::endl;
+
+  Grid_finalize();
+} // main
+
+
diff --git a/tests/hmc/Test_hmc_GparityIwasakiGauge.cc b/tests/hmc/Test_hmc_GparityIwasakiGauge.cc
index 7f74d5d8..d4bfa0a5 100644
--- a/tests/hmc/Test_hmc_GparityIwasakiGauge.cc
+++ b/tests/hmc/Test_hmc_GparityIwasakiGauge.cc
@@ -58,7 +58,7 @@ int main(int argc, char **argv) {
   CheckpointerParameters CPparams;  
   CPparams.config_prefix = "ckpoint_EODWF_lat";
   CPparams.rng_prefix = "ckpoint_EODWF_rng";
-  CPparams.saveInterval = 5;
+  CPparams.saveInterval = 1;
   CPparams.format = "IEEE64BIG";
   
   TheHMC.Resources.LoadNerscCheckpointer(CPparams);
@@ -79,7 +79,7 @@ int main(int argc, char **argv) {
   // that have a complex construction
   // standard
   RealD beta = 2.6 ;
-  const int nu = 3;
+  const int nu = 1;
   std::vector<int> twists(Nd,0);
   twists[nu] = 1;
   ConjugateGimplD::setDirections(twists);
diff --git a/tests/hmc/Test_rhmc_EOWilsonRatioPowQuarter.cc b/tests/hmc/Test_rhmc_EOWilsonRatioPowQuarter.cc
new file mode 100644
index 00000000..3618e422
--- /dev/null
+++ b/tests/hmc/Test_rhmc_EOWilsonRatioPowQuarter.cc
@@ -0,0 +1,139 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_rhmc_EOWilsonRatio.cc
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+//This test is for the Wilson action with the determinant det( M^dag M)^1/4
+//testing the generic RHMC 
+
+int main(int argc, char **argv) {
+  using namespace Grid;
+   ;
+
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  // here make a routine to print all the relevant information on the run
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+   // Typedefs to simplify notation
+  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  // Uses the default minimum norm
+  typedef WilsonImplR FermionImplPolicy;
+  typedef WilsonFermionR FermionAction;
+  typedef typename FermionAction::FermionField FermionField;
+
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  HMCWrapper TheHMC;
+
+  // Grid from the command line
+  TheHMC.Resources.AddFourDimGrid("gauge");
+
+  // Checkpointer definition
+  CheckpointerParameters CPparams;  
+  CPparams.config_prefix = "ckpoint_lat";
+  CPparams.rng_prefix = "ckpoint_rng";
+  CPparams.saveInterval = 5;
+  CPparams.format = "IEEE64BIG";
+  
+  TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  // Construct observables
+  typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
+  TheHMC.Resources.AddObservable<PlaqObs>();
+  //////////////////////////////////////////////
+
+  /////////////////////////////////////////////////////////////
+  // Collect actions, here use more encapsulation
+  // need wrappers of the fermionic classes 
+  // that have a complex construction
+  // standard
+  RealD beta = 5.6 ;
+  WilsonGaugeActionR Waction(beta);
+    
+  auto GridPtr = TheHMC.Resources.GetCartesian();
+  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
+
+  // temporarily need a gauge field
+  LatticeGaugeField U(GridPtr);
+
+  Real mass = -0.77;
+  Real pv   = 0.0;
+
+  // Can we define an overloaded operator that does not need U and initialises
+  // it with zeroes?
+  FermionAction DenOp(U, *GridPtr, *GridRBPtr, mass);
+  FermionAction NumOp(U, *GridPtr, *GridRBPtr, pv);
+
+
+  // 1/2+1/2 flavour
+  // RationalActionParams(int _inv_pow = 2,
+  // 		       RealD _lo      = 0.0, 
+  // 		       RealD _hi      = 1.0, 
+  // 		       int _maxit     = 1000,
+  // 		       RealD tol      = 1.0e-8, 
+  // 		       int _degree    = 10,
+  // 		       int _precision = 64,
+  // 		       int _BoundsCheckFreq=20)
+
+
+  int inv_pow = 4;
+  RationalActionParams Params(inv_pow,1.0e-2,64.0,1000,1.0e-6,14,64,1);
+
+  GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> RHMC(NumOp,DenOp,Params);
+
+    // Collect actions
+  ActionLevel<HMCWrapper::Field> Level1(1);
+  Level1.push_back(&RHMC);
+
+  ActionLevel<HMCWrapper::Field> Level2(4);
+  Level2.push_back(&Waction);
+
+  TheHMC.TheAction.push_back(Level1);
+  TheHMC.TheAction.push_back(Level2);
+  /////////////////////////////////////////////////////////////
+
+  // HMC parameters are serialisable 
+  TheHMC.Parameters.MD.MDsteps = 20;
+  TheHMC.Parameters.MD.trajL   = 1.0;
+
+  TheHMC.ReadCommandLine(argc, argv); // these can be parameters from file
+  TheHMC.Run();
+
+  Grid_finalize();
+
+} // main
+
+
+
+
+
diff --git a/tests/hmc/Test_rhmc_EOWilsonRatio_doubleVsMixedPrec.cc b/tests/hmc/Test_rhmc_EOWilsonRatio_doubleVsMixedPrec.cc
new file mode 100644
index 00000000..53940b50
--- /dev/null
+++ b/tests/hmc/Test_rhmc_EOWilsonRatio_doubleVsMixedPrec.cc
@@ -0,0 +1,119 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_rhmc_EOWilsonRatio_doubleVsMixedPrec.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+//This test ensures the mixed precision RHMC gives the same result as the regular double precision
+int main(int argc, char **argv) {
+  using namespace Grid;
+
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  // Uses the default minimum norm
+
+  typedef WilsonImplD FermionImplPolicyD;
+  typedef WilsonFermionD FermionActionD;
+  typedef typename FermionActionD::FermionField FermionFieldD;
+
+  typedef WilsonImplF FermionImplPolicyF;
+  typedef WilsonFermionF FermionActionF;
+  typedef typename FermionActionF::FermionField FermionFieldF;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  HMCWrapper TheHMC;
+  TheHMC.Resources.AddFourDimGrid("gauge");
+
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  auto GridPtrD = TheHMC.Resources.GetCartesian();
+  auto GridRBPtrD = TheHMC.Resources.GetRBCartesian();
+
+  GridCartesian* GridPtrF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
+
+  // temporarily need a gauge field
+  LatticeGaugeFieldD Ud(GridPtrD);
+  LatticeGaugeFieldF Uf(GridPtrF);
+
+  Real mass = -0.77;
+  Real pv   = 0.0;
+
+  FermionActionD DenOpD(Ud, *GridPtrD, *GridRBPtrD, mass);
+  FermionActionD NumOpD(Ud, *GridPtrD, *GridRBPtrD, pv);
+
+  FermionActionF DenOpF(Uf, *GridPtrF, *GridRBPtrF, mass);
+  FermionActionF NumOpF(Uf, *GridPtrF, *GridRBPtrF, pv);
+
+  TheHMC.Resources.AddRNGs();
+  PeriodicGimplR::HotConfiguration(TheHMC.Resources.GetParallelRNG(), Ud);
+
+  std::string seed_string = "the_seed";
+
+  //Setup the pseudofermion actions
+  RationalActionParams GenParams;
+  GenParams.inv_pow = 2;
+  GenParams.lo = 1e-2;
+  GenParams.hi = 64.0;
+  GenParams.MaxIter = 1000;
+  GenParams.action_tolerance = GenParams.md_tolerance = 1e-6;
+  GenParams.action_degree = GenParams.md_degree = 6;
+  GenParams.precision = 64;
+  GenParams.BoundsCheckFreq = 20;
+
+  GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> GenD(NumOpD,DenOpD,GenParams);
+  GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> GenFD(NumOpD, DenOpD, 
+													NumOpF, DenOpF, 
+													GenParams, 50);
+  TheHMC.Resources.GetParallelRNG().SeedUniqueString(seed_string);
+  GenD.refresh(Ud, TheHMC.Resources.GetSerialRNG(), TheHMC.Resources.GetParallelRNG());    
+  RealD Sd = GenD.S(Ud);
+  LatticeGaugeField derivD(Ud);
+  GenD.deriv(Ud,derivD);   
+
+  TheHMC.Resources.GetParallelRNG().SeedUniqueString(seed_string);
+  GenFD.refresh(Ud, TheHMC.Resources.GetSerialRNG(), TheHMC.Resources.GetParallelRNG());    
+  RealD Sfd = GenFD.S(Ud);
+  LatticeGaugeField derivFD(Ud);
+  GenFD.deriv(Ud,derivFD);   
+
+  //Compare
+  std::cout << "Action : " << Sd << " " << Sfd << " reldiff " << (Sd - Sfd)/Sd << std::endl;
+  
+  LatticeGaugeField diff(Ud);
+  axpy(diff, -1.0, derivD, derivFD);
+  std::cout << "Norm of difference in deriv " << sqrt(norm2(diff)) << std::endl;
+
+  Grid_finalize();
+  return 0;
+}
+
diff --git a/tests/hmc/Test_rhmc_EOWilsonRatio_genericVsOneFlavor.cc b/tests/hmc/Test_rhmc_EOWilsonRatio_genericVsOneFlavor.cc
new file mode 100644
index 00000000..6ee0e7c5
--- /dev/null
+++ b/tests/hmc/Test_rhmc_EOWilsonRatio_genericVsOneFlavor.cc
@@ -0,0 +1,122 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_rhmc_EOWilsonRatio_genericVsOneFlavor.cc
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Christopher Kelly <ckelly@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+//This test ensures that the OneFlavourEvenOddRatioRationalPseudoFermionAction and GeneralEvenOddRatioRationalPseudoFermionAction action (with parameters set appropriately0
+//give the same results
+
+int main(int argc, char **argv) {
+  using namespace Grid;
+
+  Grid_init(&argc, &argv);
+  int threads = GridThread::GetThreads();
+  std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
+
+  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;  // Uses the default minimum norm
+  typedef WilsonImplR FermionImplPolicy;
+  typedef WilsonFermionR FermionAction;
+  typedef typename FermionAction::FermionField FermionField;
+
+  //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+  HMCWrapper TheHMC;
+  TheHMC.Resources.AddFourDimGrid("gauge");
+
+
+  // // Checkpointer definition
+  // CheckpointerParameters CPparams;  
+  // CPparams.config_prefix = "ckpoint_lat";
+  // CPparams.rng_prefix = "ckpoint_rng";
+  // CPparams.saveInterval = 5;
+  // CPparams.format = "IEEE64BIG";
+  
+  // TheHMC.Resources.LoadNerscCheckpointer(CPparams);
+
+  RNGModuleParameters RNGpar;
+  RNGpar.serial_seeds = "1 2 3 4 5";
+  RNGpar.parallel_seeds = "6 7 8 9 10";
+  TheHMC.Resources.SetRNGSeeds(RNGpar);
+
+  auto GridPtr = TheHMC.Resources.GetCartesian();
+  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
+
+  // temporarily need a gauge field
+  LatticeGaugeField U(GridPtr);
+
+  Real mass = -0.77;
+  Real pv   = 0.0;
+
+  FermionAction DenOp(U, *GridPtr, *GridRBPtr, mass);
+  FermionAction NumOp(U, *GridPtr, *GridRBPtr, pv);
+
+  TheHMC.Resources.AddRNGs();
+  PeriodicGimplR::HotConfiguration(TheHMC.Resources.GetParallelRNG(), U);
+
+  std::string seed_string = "the_seed";
+
+  //1f action
+  OneFlavourRationalParams OneFParams(1.0e-2,64.0,1000,1.0e-6,6); 
+
+  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> OneF(NumOp,DenOp,OneFParams);
+  TheHMC.Resources.GetParallelRNG().SeedUniqueString(seed_string);
+  OneF.refresh(U, TheHMC.Resources.GetParallelRNG());    
+  RealD OneFS = OneF.S(U);
+  LatticeGaugeField OneFderiv(U);
+  OneF.deriv(U,OneFderiv);    
+  
+  //general action
+  RationalActionParams GenParams;
+  GenParams.inv_pow = 2;
+  GenParams.lo = OneFParams.lo;
+  GenParams.hi = OneFParams.hi;
+  GenParams.MaxIter = OneFParams.MaxIter;
+  GenParams.action_tolerance = GenParams.md_tolerance = OneFParams.tolerance;
+  GenParams.action_degree = GenParams.md_degree = OneFParams.degree;
+  GenParams.precision = OneFParams.precision;
+  GenParams.BoundsCheckFreq = OneFParams.BoundsCheckFreq;
+
+  GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> Gen(NumOp,DenOp,GenParams);
+  TheHMC.Resources.GetParallelRNG().SeedUniqueString(seed_string);
+  Gen.refresh(U, TheHMC.Resources.GetParallelRNG());    
+  RealD GenS = Gen.S(U);
+  LatticeGaugeField Genderiv(U);
+  Gen.deriv(U,Genderiv);   
+
+
+  //Compare
+  std::cout << "Action : " << OneFS << " " << GenS << " reldiff " << (OneFS - GenS)/OneFS << std::endl;
+  
+  LatticeGaugeField diff(U);
+  axpy(diff, -1.0, Genderiv, OneFderiv);
+  std::cout << "Norm of difference in deriv " << sqrt(norm2(diff)) << std::endl;
+
+  Grid_finalize();
+  return 0;
+}
+
diff --git a/tests/solver/Test_dwf_multishift_mixedprec.cc b/tests/solver/Test_dwf_multishift_mixedprec.cc
new file mode 100644
index 00000000..bdede459
--- /dev/null
+++ b/tests/solver/Test_dwf_multishift_mixedprec.cc
@@ -0,0 +1,184 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_dwf_multishift_mixedprec.cc
+
+    Copyright (C) 2015
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+template<typename SpeciesD, typename SpeciesF, typename GaugeStatisticsType>
+void run_test(int argc, char ** argv, const typename SpeciesD::ImplParams &params){
+  const int Ls = 16;
+  GridCartesian* UGrid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* UrbGrid_d = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_d);
+  GridCartesian* FGrid_d = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_d);
+  GridRedBlackCartesian* FrbGrid_d = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_d);
+
+  GridCartesian* UGrid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
+  GridRedBlackCartesian* UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f);
+  GridCartesian* FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid_f);
+  GridRedBlackCartesian* FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid_f);
+
+  typedef typename SpeciesD::FermionField FermionFieldD;
+  typedef typename SpeciesF::FermionField FermionFieldF;
+  
+  std::vector<int> seeds4({1, 2, 3, 4});
+  std::vector<int> seeds5({5, 6, 7, 8});
+  GridParallelRNG RNG5(FGrid_d);
+  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG RNG4(UGrid_d);
+  RNG4.SeedFixedIntegers(seeds4);
+
+  FermionFieldD src_d(FGrid_d);
+  random(RNG5, src_d);
+
+  LatticeGaugeFieldD Umu_d(UGrid_d);
+
+  //CPS-created G-parity ensembles have a factor of 2 error in the plaquette that causes the read to fail unless we workaround it
+  bool gparity_plaquette_fix = false;
+  for(int i=1;i<argc;i++){
+    if(std::string(argv[i]) == "--gparity_plaquette_fix"){
+      gparity_plaquette_fix=true;
+      break;
+    }
+  }
+
+  bool cfg_loaded=false;
+  for(int i=1;i<argc;i++){
+    if(std::string(argv[i]) == "--load_config"){
+      assert(i != argc-1);
+      std::string file = argv[i+1];
+      NerscIO io;
+      FieldMetaData metadata;
+
+      if(gparity_plaquette_fix) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+      io.readConfiguration<GaugeStatisticsType>(Umu_d, metadata, file);
+
+      if(gparity_plaquette_fix){
+	metadata.plaquette *= 2.; //correct header value
+
+	//Get the true plaquette
+	FieldMetaData tmp;
+	GaugeStatisticsType gs; gs(Umu_d, tmp);
+	
+	std::cout << "After correction: plaqs " << tmp.plaquette << " " << metadata.plaquette << std::endl;
+	assert(fabs(tmp.plaquette -metadata.plaquette ) < 1.0e-5 );
+      }
+
+      cfg_loaded=true;
+      break;
+    }
+  }
+
+  if(!cfg_loaded)
+    SU<Nc>::HotConfiguration(RNG4, Umu_d);
+
+  LatticeGaugeFieldF Umu_f(UGrid_f);
+  precisionChange(Umu_f, Umu_d);
+
+  std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << "   Ls: " << Ls << std::endl;
+
+  RealD mass = 0.01;
+  RealD M5 = 1.8;
+  SpeciesD Ddwf_d(Umu_d, *FGrid_d, *FrbGrid_d, *UGrid_d, *UrbGrid_d, mass, M5, params);
+  SpeciesF Ddwf_f(Umu_f, *FGrid_f, *FrbGrid_f, *UGrid_f, *UrbGrid_f, mass, M5, params);
+
+  FermionFieldD src_o_d(FrbGrid_d);
+  pickCheckerboard(Odd, src_o_d, src_d);
+
+  SchurDiagMooeeOperator<SpeciesD, FermionFieldD> HermOpEO_d(Ddwf_d);
+  SchurDiagMooeeOperator<SpeciesF, FermionFieldF> HermOpEO_f(Ddwf_f);
+
+  AlgRemez remez(1e-4, 64, 50);
+  int order = 15;
+  remez.generateApprox(order, 1, 2); //sqrt
+
+  MultiShiftFunction shifts(remez, 1e-10, false);
+
+  int relup_freq = 50;
+  double t1=usecond();
+  ConjugateGradientMultiShiftMixedPrec<FermionFieldD,FermionFieldF> mcg(10000, shifts, FrbGrid_f, HermOpEO_f, relup_freq);
+
+  std::vector<FermionFieldD> results_o_d(order, FrbGrid_d);
+  mcg(HermOpEO_d, src_o_d, results_o_d);
+  double t2=usecond();
+
+  //Crosscheck double and mixed prec results
+  ConjugateGradientMultiShift<FermionFieldD> dmcg(10000, shifts);
+  std::vector<FermionFieldD> results_o_d_2(order, FrbGrid_d);
+  dmcg(HermOpEO_d, src_o_d, results_o_d_2);
+  double t3=usecond();
+
+  std::cout << GridLogMessage << "Comparison of mixed prec results to double prec results |mixed - double|^2 :" << std::endl;
+  FermionFieldD tmp(FrbGrid_d);
+  for(int i=0;i<order;i++){
+    RealD ndiff = axpy_norm(tmp, -1., results_o_d[i], results_o_d_2[i]);
+    std::cout << i << " " << ndiff << std::endl;
+  }
+
+  std::cout<<GridLogMessage << "Mixed precision algorithm: Total usec    =   "<< (t2-t1)<<std::endl;
+  std::cout<<GridLogMessage << "Double precision algorithm: Total usec    =   "<< (t3-t2)<<std::endl;
+}
+
+
+
+
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc, &argv);
+
+  bool gparity = false;
+  int gpdir;
+
+  for(int i=1;i<argc;i++){
+    std::string arg(argv[i]);
+    if(arg == "--Gparity"){
+      assert(i!=argc-1);
+      gpdir = std::stoi(argv[i+1]);
+      assert(gpdir >= 0 && gpdir <= 2); //spatial!
+      gparity = true;
+    }
+  }
+  if(gparity){
+    std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl;
+    GparityWilsonImplParams params;
+    params.twists[gpdir] = 1;
+    
+    std::vector<int> conj_dirs(Nd,0);
+    conj_dirs[gpdir] = 1;
+    ConjugateGimplD::setDirections(conj_dirs);
+
+    run_test<GparityDomainWallFermionD, GparityDomainWallFermionF, ConjugateGaugeStatistics>(argc,argv,params);
+  }else{
+    std::cout << "Running test with periodic BCs" << std::endl;
+    WilsonImplParams params;
+    run_test<DomainWallFermionD, DomainWallFermionF, PeriodicGaugeStatistics>(argc,argv,params);
+  }
+
+  Grid_finalize();
+}