From e1124d9572fd582738d1440c83a470f915764210 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:50:15 +0100
Subject: [PATCH 01/13] Integrator verbosity updates

---
 Grid/qcd/hmc/integrators/Integrator.h         | 21 +++++++------------
 .../hmc/integrators/Integrator_algorithm.h    |  6 ++----
 2 files changed, 10 insertions(+), 17 deletions(-)
diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h
index 7d65ed60..940a35d3 100644
--- a/Grid/qcd/hmc/integrators/Integrator.h
+++ b/Grid/qcd/hmc/integrators/Integrator.h
@@ -54,7 +54,7 @@ public:
 
   template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 >
   IntegratorParameters(ReaderClass & Reader){
-    std::cout << "Reading integrator\n";
+    std::cout << GridLogMessage << "Reading integrator\n";
     read(Reader, "Integrator", *this);
   }
 
@@ -132,7 +132,7 @@ class Integrator {
       double end_full = usecond();
       double time_full  = (end_full - start_full) / 1e3;
       double time_force = (end_force - start_force) / 1e3;
-      std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl;
+      std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl;
     }
 
     // Force from the other representations
@@ -237,8 +237,7 @@ class Integrator {
       for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
         // get gauge field from the SmearingPolicy and
         // based on the boolean is_smeared in actionID
-        Field& Us =
-            Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
+        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
         as[level].actions.at(actionID)->refresh(Us, pRNG);
       }
 
@@ -251,13 +250,11 @@ class Integrator {
   // over the representations
   struct _S {
     template <class FieldType, class Repr>
-    void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
-                    int level, RealD& H) {
+    void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, int level, RealD& H) {
       
       for (int a = 0; a < repr_set.size(); ++a) {
         RealD Hterm = repr_set.at(a)->S(Rep.U);
-        std::cout << GridLogMessage << "S Level " << level << " term " << a
-                  << " H Hirep = " << Hterm << std::endl;
+        std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl;
         H += Hterm;
 
       }
@@ -269,7 +266,7 @@ class Integrator {
 
 
     RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
-    std::cout << " Momentum hamiltonian "<< -H<<std::endl;
+
     RealD Hterm;
 
     // Actions
@@ -279,8 +276,7 @@ class Integrator {
         // based on the boolean is_smeared in actionID
         Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
         Hterm = as[level].actions.at(actionID)->S(Us);
-        std::cout << GridLogMessage << "S Level " << level << " term "
-                  << actionID << " H = " << Hterm << std::endl;
+        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
         H += Hterm;
       }
       as[level].apply(S_hireps, Representations, level, H);
@@ -305,8 +301,7 @@ class Integrator {
     // Check the clocks all match on all levels
     for (int level = 0; level < as.size(); ++level) {
       assert(fabs(t_U - t_P[level]) < 1.0e-6);  // must be the same
-      std::cout << GridLogIntegrator << " times[" << level
-                << "]= " << t_P[level] << " " << t_U << std::endl;
+      std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl;
     }
 
     // and that we indeed got to the end of the trajectory
diff --git a/Grid/qcd/hmc/integrators/Integrator_algorithm.h b/Grid/qcd/hmc/integrators/Integrator_algorithm.h
index 13a37aeb..47574026 100644
--- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h
+++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h
@@ -231,8 +231,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
     Field Pfg(U._grid);
     Ufg = U;
     Pfg = zero;
-    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
-              << std::endl;
+    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep << std::endl;
     // prepare_fg; no prediction/result cache for now
     // could relax CG stopping conditions for the
     // derivatives in the small step since the force gets multiplied by
@@ -271,8 +270,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
         this->step(U, level + 1, first_step, 0);
       }
 
-      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps),
-                        (1.0 - 2.0 * lambda) * eps);
+      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps);
 
       if (level == fl) {  // lowest level
         this->update_U(U, 0.5 * eps);

From b0de7ab7db06748f470cde7acc7e08062af03243 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:50:45 +0100
Subject: [PATCH 02/13] Extra do nothing guesser

---
 Grid/algorithms/iterative/Deflation.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h
index ceba1c09..8df67b3c 100644
--- a/Grid/algorithms/iterative/Deflation.h
+++ b/Grid/algorithms/iterative/Deflation.h
@@ -35,7 +35,11 @@ class ZeroGuesser: public LinearFunction<Field> {
 public:
   virtual void operator()(const Field &src, Field &guess) { guess = zero; };
 };
-
+template<class Field>
+class DoNothingGuesser: public LinearFunction<Field> {
+public:
+  virtual void operator()(const Field &src, Field &guess) {  };
+};
 template<class Field>
 class SourceGuesser: public LinearFunction<Field> {
 public:

From b595f58e4cb13801545ec8ac35d1ec80adee82d5 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:51:23 +0100
Subject: [PATCH 03/13] Allow HMC to acces matrix

---
 Grid/algorithms/LinearOperator.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h
index b86863b8..a1be48f4 100644
--- a/Grid/algorithms/LinearOperator.h
+++ b/Grid/algorithms/LinearOperator.h
@@ -178,7 +178,7 @@ namespace Grid {
     //////////////////////////////////////////////////////////
 
     template<class Field>
-      class SchurOperatorBase :  public LinearOperatorBase<Field> {
+    class SchurOperatorBase :  public LinearOperatorBase<Field> {
     public:
       virtual  RealD Mpc      (const Field &in, Field &out) =0;
       virtual  RealD MpcDag   (const Field &in, Field &out) =0;
@@ -211,10 +211,9 @@ namespace Grid {
       }
     };
     template<class Matrix,class Field>
-      class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
-    protected:
-      Matrix &_Mat;
+    class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
     public:
+      Matrix &_Mat;
       SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
       virtual  RealD Mpc      (const Field &in, Field &out) {
       Field tmp(in._grid);

From 6505efcb57e078104a0cb275501833b548007e38 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:51:57 +0100
Subject: [PATCH 04/13] Set iteration count if guess is already good

---
 Grid/algorithms/iterative/ConjugateGradient.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h
index c8daa995..25d206f5 100644
--- a/Grid/algorithms/iterative/ConjugateGradient.h
+++ b/Grid/algorithms/iterative/ConjugateGradient.h
@@ -89,6 +89,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
 
     // Check if guess is really REALLY good :)
     if (cp <= rsq) {
+      std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
+      IterationsToComplete = 0;	
       return;
     }
 

From 5921b1d2b9ef3a44ebefaae3d1b82bf118882f0e Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:52:33 +0100
Subject: [PATCH 05/13] Layout/whitespace changes

---
 .../iterative/ConjugateGradientMixedPrec.h         | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
index c7332455..4c2cf12f 100644
--- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
+++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
@@ -30,8 +30,11 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
 
 namespace Grid {
 
+
   //Mixed precision restarted defect correction CG
-  template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
+  template<class FieldD,class FieldF, 
+    typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
+    typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
   class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
   public:                                                
     RealD   Tolerance;
@@ -50,7 +53,12 @@ namespace Grid {
     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
     LinearFunction<FieldF> *guesser;
     
-    MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
+    MixedPrecisionConjugateGradient(RealD tol, 
+				    Integer maxinnerit, 
+				    Integer maxouterit, 
+				    GridBase* _sp_grid, 
+				    LinearOperatorBase<FieldF> &_Linop_f, 
+				    LinearOperatorBase<FieldD> &_Linop_d) :
       Linop_f(_Linop_f), Linop_d(_Linop_d),
       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
       OuterLoopNormMult(100.), guesser(NULL){ };
@@ -149,6 +157,8 @@ namespace Grid {
     }
   };
 
+
+
 }
 
 #endif

From 262a73c964540686f2e72ccce8bed27df6da43b1 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:52:58 +0100
Subject: [PATCH 06/13] COmment improvement

---
 Grid/algorithms/iterative/SchurRedBlack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Grid/algorithms/iterative/SchurRedBlack.h b/Grid/algorithms/iterative/SchurRedBlack.h
index 6c1e1f1c..9f63e8c0 100644
--- a/Grid/algorithms/iterative/SchurRedBlack.h
+++ b/Grid/algorithms/iterative/SchurRedBlack.h
@@ -261,7 +261,7 @@ namespace Grid {
     }     
     
     /////////////////////////////////////////////////////////////
-    // Override in derived. Not virtual as template methods
+    // Override in derived. 
     /////////////////////////////////////////////////////////////
     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0;
     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0;

From 73d467699766e82b0efaff5dab392941108846cf Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:53:44 +0100
Subject: [PATCH 07/13] Action and Deriv solvers allowed to differ

---
 .../pseudofermion/ExactOneFlavourRatio.h      | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
index 54a2c594..feba94c8 100644
--- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
+++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
@@ -59,12 +59,23 @@ namespace QCD{
       AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
       AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
       SchurRedBlackDiagMooeeSolve<FermionField> Solver;
+      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolver;
       FermionField Phi; // the pseudofermion field for this trajectory
 
     public:
-      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
-        OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), 
-        Solver(S, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
+      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, 
+					      AbstractEOFAFermion<Impl>& _Rop,
+					      OperatorFunction<FermionField>& ActionCG, 
+					      OperatorFunction<FermionField>& DerivCG, 
+					      Params& p, 
+					      bool use_fc=false) : 
+        Lop(_Lop), 
+	Rop(_Rop), 
+        Solver(ActionCG, false, true), 
+        DerivativeSolver(DerivCG, false, true), 
+	Phi(_Lop.FermionGrid()), 
+	param(p), 
+        use_heatbath_forecasting(use_fc)
       {
         AlgRemez remez(param.lo, param.hi, param.precision);
 
@@ -245,7 +256,7 @@ namespace QCD{
         Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
         G5R5(CG_src, Omega_spProj_Phi);
         spProj_Phi = zero;
-        Solver(Lop, CG_src, spProj_Phi);
+        DerivativeSolver(Lop, CG_src, spProj_Phi);
         Lop.Dtilde(spProj_Phi, Chi);
         G5R5(g5_R5_Chi, Chi);
         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
@@ -257,7 +268,7 @@ namespace QCD{
         Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
         G5R5(CG_src, Omega_spProj_Phi);
         spProj_Phi = zero;
-        Solver(Rop, CG_src, spProj_Phi);
+        DerivativeSolver(Rop, CG_src, spProj_Phi);
         Rop.Dtilde(spProj_Phi, Chi);
         G5R5(g5_R5_Chi, Chi);
         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);

From 7894ea6263a04e1a5bebfdaf7b424b2c18aacc55 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:54:19 +0100
Subject: [PATCH 08/13] Now have mixed precision solves in the 2f sector

---
 HMC/Mobius2p1fEOFA.cc | 197 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 185 insertions(+), 12 deletions(-)

diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc
index a6e42ceb..8f093d44 100644
--- a/HMC/Mobius2p1fEOFA.cc
+++ b/HMC/Mobius2p1fEOFA.cc
@@ -30,6 +30,134 @@ directory
 /*  END LEGAL */
 #include <Grid/Grid.h>
 
+namespace Grid{ 
+  namespace QCD{
+
+  /*
+   * Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
+   *    -- Store the single prec action operator.
+   *    -- Clone the gauge field from the operator function argument.
+   *    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
+   */
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    RealD   Tolerance;
+    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
+    Integer MaxInnerIterations;
+    Integer MaxOuterIterations;
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+
+    Integer TotalInnerIterations; //Number of inner CG iterations
+    Integer TotalOuterIterations; //Number of restarts
+    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+
+    //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
+    LinearFunction<FieldF> *guesser;
+    
+    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
+						    Integer maxinnerit, 
+						    Integer maxouterit, 
+						    GridBase* _sp_grid4, 
+						    GridBase* _sp_grid5, 
+						    FermionOperatorF &_FermOpF,
+						    FermionOperatorD &_FermOpD,
+						    SchurOperatorF   &_LinOpF,
+						    SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      InnerTolerance(tol), 
+      MaxInnerIterations(maxinnerit), 
+      MaxOuterIterations(maxouterit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5),
+      OuterLoopNormMult(100.), 
+      guesser(NULL)
+    { 
+      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
+    };
+
+      void useGuesser(LinearFunction<FieldF> &g){
+        guesser = &g;
+      }
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      
+      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpU " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
+
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Must snarf a single precision copy of the gauge field in Linop_d argument
+      ////////////////////////////////////////////////////////////////////////////////////
+      typedef typename FermionOperatorF::GaugeField GaugeFieldF;
+      typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
+      typedef typename FermionOperatorD::GaugeField GaugeFieldD;
+      typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
+
+      GridBase * GridPtrF = SinglePrecGrid4;
+      GridBase * GridPtrD = FermOpD.Umu._grid;
+      GaugeFieldF     U_f  (GridPtrF);
+      GaugeLinkFieldF Umu_f(GridPtrF);
+      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
+      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
+
+      GaugeLinkFieldD Umu_d(GridPtrD);
+      for(int mu=0;mu<Nd*2;mu++){ 
+	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
+	precisionChange(Umu_f,Umu_d);
+	PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
+      }
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+      
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Could test to make sure that LinOpF and LinOpD agree to single prec?
+      ////////////////////////////////////////////////////////////////////////////////////
+      GridBase *Fgrid = psi._grid;
+      FieldD tmp2(Fgrid);
+      FieldD tmp1(Fgrid);
+      LinOpU.Op(src,tmp1);
+      LinOpD.Op(src,tmp2);
+      std::cout << " Double gauge field "<< norm2(FermOpD.Umu)<<std::endl;
+      std::cout << " Single gauge field "<< norm2(FermOpF.Umu)<<std::endl;
+      std::cout << " Test of operators "<<norm2(tmp1)<<std::endl;
+      std::cout << " Test of operators "<<norm2(tmp2)<<std::endl;
+      tmp1=tmp1-tmp2;
+      std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl;
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
+      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+}};
+
 int main(int argc, char **argv) {
   using namespace Grid;
   using namespace Grid::QCD;
@@ -42,7 +170,9 @@ int main(int argc, char **argv) {
    // Typedefs to simplify notation
   typedef WilsonImplR FermionImplPolicy;
   typedef MobiusFermionR FermionAction;
+  typedef MobiusFermionF FermionActionF;
   typedef typename FermionAction::FermionField FermionField;
+  typedef typename FermionActionF::FermionField FermionFieldF;
 
   typedef Grid::XmlReader       Serialiser;
   
@@ -54,12 +184,12 @@ int main(int argc, char **argv) {
   MD.name    = std::string("Force Gradient");
   //  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; 
   //  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 8;
+  MD.MDsteps = 6;
   MD.trajL   = 1.0;
   
   HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 70;
-  HMCparams.Trajectories     = 200;
+  HMCparams.StartTrajectory  = 530;
+  HMCparams.Trajectories     = 1000;
   HMCparams.NoMetropolisUntil=  0;
   //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
   //  HMCparams.StartingType     =std::string("ColdStart");
@@ -97,24 +227,35 @@ int main(int argc, char **argv) {
   RealD b   = 1.0; 
   RealD c   = 0.0;
 
-  std::vector<Real> hasenbusch({ 0.1, 0.3 });
+  std::vector<Real> hasenbusch({ 0.1, 0.3, 0.6 });
 
   auto GridPtr   = TheHMC.Resources.GetCartesian();
   auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
   auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
   auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
 
+  std::vector<int> latt  = GridDefaultLatt();
+  std::vector<int> mpi   = GridDefaultMpi();
+  std::vector<int> simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
+  std::vector<int> simdD = GridDefaultSimd(Nd,vComplexD::Nsimd());
+  auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi);
+  auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
+
   IwasakiGaugeActionR GaugeAction(beta);
 
   // temporarily need a gauge field
   LatticeGaugeField U(GridPtr);
+  LatticeGaugeFieldF UF(GridPtrF);
 
   // These lines are unecessary if BC are all periodic
   std::vector<Complex> boundary = {1,1,1,-1};
   FermionAction::ImplParams Params(boundary);
+  FermionActionF::ImplParams ParamsF(boundary);
   
   double ActionStoppingCondition     = 1e-10;
-  double DerivativeStoppingCondition = 1e-7;
+  double DerivativeStoppingCondition = 1e-6;
   double MaxCGIterations = 30000;
   ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
   ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
@@ -123,17 +264,12 @@ int main(int argc, char **argv) {
   // Collect actions
   ////////////////////////////////////
   ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(4);
+  ActionLevel<HMCWrapper::Field> Level2(8);
 
   ////////////////////////////////////
   // Strange action
   ////////////////////////////////////
 
-  //  FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
-  //  FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params);
-  //  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
-  //  Level1.push_back(&StrangePseudoFermion);
-
   // DJM: setup for EOFA ratio (Mobius)
   OneFlavourRationalParams OFRp;
   OFRp.lo       = 0.1;
@@ -145,7 +281,8 @@ int main(int argc, char **argv) {
 
   MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
   MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> EOFA(Strange_Op_L, Strange_Op_R, ActionCG, OFRp, true);
+
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> EOFA(Strange_Op_L, Strange_Op_R, ActionCG, DerivativeCG, OFRp, true);
   Level1.push_back(&EOFA);
 
   ////////////////////////////////////
@@ -162,15 +299,51 @@ int main(int argc, char **argv) {
   }
   light_num.push_back(pv_mass);
 
+  typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
+  typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
+
   std::vector<FermionAction *> Numerators;
   std::vector<FermionAction *> Denominators;
+  std::vector<LinearOperatorD *> LinOpD;
+
+  std::vector<FermionActionF *> DenominatorsF;
+  std::vector<LinearOperatorF *> LinOpF; 
+
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,
+							  LinearOperatorD,LinearOperatorF> MxPCG;
+  std::vector<MxPCG *> MPCG;
+
   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
 
   for(int h=0;h<n_hasenbusch+1;h++){
+
     std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
+
     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
+
+#if 0
+    ////////////////////////////////////////////////////////////////////////////
+    // Standard CG for 2f force
+    ////////////////////////////////////////////////////////////////////////////
     Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
+#else 
+    ////////////////////////////////////////////////////////////////////////////
+    // Mixed precision CG for 2f force
+    ////////////////////////////////////////////////////////////////////////////
+    DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF));
+    LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
+    LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
+    MPCG.push_back(new MxPCG(DerivativeStoppingCondition,
+			     200,
+			     MaxCGIterations,
+			     GridPtrF,
+			     FrbGridF,
+			     *DenominatorsF[h],*Denominators[h],
+			     *LinOpF[h], *LinOpD[h]) );
+    Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],ActionCG));
+#endif
+
   }
 
   for(int h=0;h<n_hasenbusch+1;h++){

From 6fd4b0be91e90b9604f81440c727cbc263e3fb6b Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 23 Apr 2019 21:54:45 +0100
Subject: [PATCH 09/13] Evolving HMC status

---
 HMC/README | 41 +++++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/HMC/README b/HMC/README
index 7bea2782..bf97701d 100644
--- a/HMC/README
+++ b/HMC/README
@@ -1,14 +1,39 @@
-* Sign off 2+1f HMC with Hasenbush and strange RHMC
-
-- Wilson plaquette cross checked against CPS and literature GwilsonFnone
-- Timesteps matched
-
-- Use 16^3x32 
-
 ********************************************************************
-* From previous CPS runs:
+TODO: 
 ********************************************************************
 
+- Currently 10 traj per hour
+
+- EOFA use a different derivative solver from action solver
+- EOFA fix Davids hack to the SchurRedBlack guessing
+
+*** Reduce precision/tolerance  in EOFA with second CG param.                          (10% speed up)
+*** Force gradient - reduced precision solve for the gradient                          (4/3x speedup)
+*** Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
+    -- Store the single prec action operator.
+    -- Clone the gauge field from the operator function argument.
+    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
+
+*** Mixed precision CG into EOFA portion         
+*** Further reduce precision in forces to 10^-6 ?
+
+*** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3.
+
+- Use mixed precision CG in HMC                           
+- SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead.
+- Or make an OperatorFunction for mixed precision as a wrapper
+
+********************************************************************
+* Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
+* Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
+* Wilson plaquette cross checked against CPS and literature GwilsonFnone
+********************************************************************
+
+********************************************************************
+* RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs:
+********************************************************************
+
+****
 Strange (m=0.04)  has eigenspan 
 **** 
 16^3 done as 1+1+1 with separate PV's. 

From 94ebcf551c1402d7ebff22959f0ff40361f66205 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 24 Apr 2019 06:28:14 +0100
Subject: [PATCH 10/13] Iteratoin range fix

---
 Grid/algorithms/iterative/ConjugateGradient.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h
index 25d206f5..c1717e2a 100644
--- a/Grid/algorithms/iterative/ConjugateGradient.h
+++ b/Grid/algorithms/iterative/ConjugateGradient.h
@@ -106,7 +106,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
 
     SolverTimer.Start();
     int k;
-    for (k = 1; k <= MaxIterations*1000; k++) {
+    for (k = 1; k <= MaxIterations; k++) {
       c = cp;
 
       MatrixTimer.Start();
@@ -167,8 +167,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
         return;
       }
     }
-    std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
-              << std::endl;
+    std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl;
 
     if (ErrorOnNoConverge) assert(0);
     IterationsToComplete = k;

From 6d0b9856978af90d19aec511e32065067a0ae341 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 24 Apr 2019 06:29:52 +0100
Subject: [PATCH 11/13] Verbose

---
 Grid/qcd/hmc/integrators/Integrator.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h
index 940a35d3..9d4376fc 100644
--- a/Grid/qcd/hmc/integrators/Integrator.h
+++ b/Grid/qcd/hmc/integrators/Integrator.h
@@ -264,6 +264,7 @@ class Integrator {
   // Calculate action
   RealD S(Field& U) {  // here also U not used
 
+    std::cout << GridLogIntegrator << "Integrator action\n";
 
     RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
 
@@ -275,6 +276,7 @@ class Integrator {
         // get gauge field from the SmearingPolicy and
         // based on the boolean is_smeared in actionID
         Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
+        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
         Hterm = as[level].actions.at(actionID)->S(Us);
         std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
         H += Hterm;

From 2095c12eac86057fb9159cfda3d562dc6ef0aecb Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 22 May 2019 09:54:21 +0100
Subject: [PATCH 12/13] Make detection of HPE 8600 automatic

---
 Grid/communicator/SharedMemory.h     |  2 ++
 Grid/communicator/SharedMemoryMPI.cc | 24 ++++++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/Grid/communicator/SharedMemory.h b/Grid/communicator/SharedMemory.h
index 9f6b1a25..63cfc7a6 100644
--- a/Grid/communicator/SharedMemory.h
+++ b/Grid/communicator/SharedMemory.h
@@ -103,6 +103,8 @@ class GlobalSharedMemory {
   //////////////////////////////////////////////////////////////////////////////////////
   static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
   static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
   ///////////////////////////////////////////////////
   // Provide shared memory facilities off comm world
   ///////////////////////////////////////////////////
diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc
index d3b094ad..b2a6ab53 100644
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -132,7 +132,22 @@ int Log2Size(int TwoToPower,int MAXLOG2)
 }
 void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 {
-#ifdef HYPERCUBE
+  //////////////////////////////////////////////////////////////////////////////
+  // Look and see if it looks like an HPE 8600 based on hostname conventions
+  //////////////////////////////////////////////////////////////////////////////
+  const int namelen = _POSIX_HOST_NAME_MAX;
+  char name[namelen];
+  int R;
+  int I;
+  int N;
+  gethostname(name,namelen);
+  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
+
+  if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm);
+  else         OptimalCommunicatorSharedMemory(processors,optimal_comm);
+}
+void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
+{
   ////////////////////////////////////////////////////////////////
   // Assert power of two shm_size.
   ////////////////////////////////////////////////////////////////
@@ -253,7 +268,9 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
   /////////////////////////////////////////////////////////////////
   int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
   assert(ierr==0);
-#else 
+}
+void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
+{
   ////////////////////////////////////////////////////////////////
   // Assert power of two shm_size.
   ////////////////////////////////////////////////////////////////
@@ -306,7 +323,6 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
   /////////////////////////////////////////////////////////////////
   int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
   assert(ierr==0);
-#endif
 }
 ////////////////////////////////////////////////////////////////////////////////////////////
 // SHMGET
@@ -337,7 +353,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
         int errsv = errno;
         printf("Errno %d\n",errsv);
         printf("key   %d\n",key);
-        printf("size  %lld\n",size);
+        printf("size  %ld\n",size);
         printf("flags %d\n",flags);
         perror("shmget");
         exit(1);

From 44b53c3ba275a5389110b52afcf43038474ca970 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 22 May 2019 09:56:26 +0100
Subject: [PATCH 13/13] F1 ensemble running with 96%~ acceptance etc..

---
 .../pseudofermion/ExactOneFlavourRatio.h      |  76 ++++++--
 .../pseudofermion/TwoFlavourEvenOddRatio.h    |  12 +-
 HMC/Mobius2p1fEOFA.cc                         | 162 +++++++++++++-----
 HMC/README                                    |  17 +-
 tests/IO/Test_serialisation.cc                |  13 ++
 5 files changed, 220 insertions(+), 60 deletions(-)

diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
index feba94c8..25285565 100644
--- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
+++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
@@ -58,21 +58,26 @@ namespace QCD{
       bool use_heatbath_forecasting;
       AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
       AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
-      SchurRedBlackDiagMooeeSolve<FermionField> Solver;
-      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolver;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverHB;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverL;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverR;
+      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL;
+      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR;
       FermionField Phi; // the pseudofermion field for this trajectory
 
     public:
       ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, 
 					      AbstractEOFAFermion<Impl>& _Rop,
-					      OperatorFunction<FermionField>& ActionCG, 
-					      OperatorFunction<FermionField>& DerivCG, 
+					      OperatorFunction<FermionField>& HeatbathCG, 
+					      OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR, 
+					      OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR, 
 					      Params& p, 
 					      bool use_fc=false) : 
         Lop(_Lop), 
 	Rop(_Rop), 
-        Solver(ActionCG, false, true), 
-        DerivativeSolver(DerivCG, false, true), 
+	SolverHB(HeatbathCG,false,true),
+	SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), 
+	DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), 
 	Phi(_Lop.FermionGrid()), 
 	param(p), 
         use_heatbath_forecasting(use_fc)
@@ -109,6 +114,9 @@ namespace QCD{
       // We generate a Gaussian noise vector \eta, and then compute
       //  \Phi = M_{\rm EOFA}^{-1/2} * \eta
       // using a rational approximation to the inverse square root
+      //
+      // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta
+      //
       virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
       {
         Lop.ImportGauge(U);
@@ -129,7 +137,6 @@ namespace QCD{
         RealD scale = std::sqrt(0.5);
         gaussian(pRNG,eta);
         eta = eta * scale;
-        printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
 
         // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
         RealD N(PowerNegHalf.norm);
@@ -150,11 +157,11 @@ namespace QCD{
           if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
             Lop.Mdag(CG_src, Forecast_src);
             CG_soln = Forecast(Lop, Forecast_src, prev_solns);
-            Solver(Lop, CG_src, CG_soln);
+            SolverHB(Lop, CG_src, CG_soln);
             prev_solns.push_back(CG_soln);
           } else {
             CG_soln = zero; // Just use zero as the initial guess
-            Solver(Lop, CG_src, CG_soln);
+            SolverHB(Lop, CG_src, CG_soln);
           }
           Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
           tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
@@ -177,11 +184,11 @@ namespace QCD{
           if(use_heatbath_forecasting){
             Rop.Mdag(CG_src, Forecast_src);
             CG_soln = Forecast(Rop, Forecast_src, prev_solns);
-            Solver(Rop, CG_src, CG_soln);
+            SolverHB(Rop, CG_src, CG_soln);
             prev_solns.push_back(CG_soln);
           } else {
             CG_soln = zero;
-            Solver(Rop, CG_src, CG_soln);
+            SolverHB(Rop, CG_src, CG_soln);
           }
           Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
           tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
@@ -193,8 +200,47 @@ namespace QCD{
         // Reset shift coefficients for energy and force evals
         Lop.RefreshShiftCoefficients(0.0);
         Rop.RefreshShiftCoefficients(-1.0);
+
+	// Bounds check
+	RealD EtaDagEta = norm2(eta);
+	//	RealD PhiDagMPhi= norm2(eta);
+
       };
 
+      void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi) 
+      {
+#if 0
+        Lop.ImportGauge(U);
+        Rop.ImportGauge(U);
+
+        FermionField spProj_Phi(Lop.FermionGrid());
+	FermionField mPhi(Lop.FermionGrid());
+        std::vector<FermionField> tmp(2, Lop.FermionGrid());
+	mPhi = phi;
+	
+        // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
+        spProj(Phi, spProj_Phi, -1, Lop.Ls);
+        Lop.Omega(spProj_Phi, tmp[0], -1, 0);
+        G5R5(tmp[1], tmp[0]);
+        tmp[0] = zero;
+        SolverL(Lop, tmp[1], tmp[0]);
+        Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
+        Lop.Omega(tmp[1], tmp[0], -1, 1);
+	mPhi = mPhi -  Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
+
+        // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
+        //               - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
+        spProj(Phi, spProj_Phi, 1, Rop.Ls);
+        Rop.Omega(spProj_Phi, tmp[0], 1, 0);
+        G5R5(tmp[1], tmp[0]);
+        tmp[0] = zero;
+        SolverR(Rop, tmp[1], tmp[0]);
+        Rop.Dtilde(tmp[0], tmp[1]);
+        Rop.Omega(tmp[1], tmp[0], 1, 1);
+        action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
+#endif
+      }
+
       // EOFA action: see Eqn. (10) of arXiv:1706.05843
       virtual RealD S(const GaugeField& U)
       {
@@ -212,7 +258,7 @@ namespace QCD{
         Lop.Omega(spProj_Phi, tmp[0], -1, 0);
         G5R5(tmp[1], tmp[0]);
         tmp[0] = zero;
-        Solver(Lop, tmp[1], tmp[0]);
+        SolverL(Lop, tmp[1], tmp[0]);
         Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
         Lop.Omega(tmp[1], tmp[0], -1, 1);
         action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
@@ -223,7 +269,7 @@ namespace QCD{
         Rop.Omega(spProj_Phi, tmp[0], 1, 0);
         G5R5(tmp[1], tmp[0]);
         tmp[0] = zero;
-        Solver(Rop, tmp[1], tmp[0]);
+        SolverR(Rop, tmp[1], tmp[0]);
         Rop.Dtilde(tmp[0], tmp[1]);
         Rop.Omega(tmp[1], tmp[0], 1, 1);
         action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
@@ -256,7 +302,7 @@ namespace QCD{
         Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
         G5R5(CG_src, Omega_spProj_Phi);
         spProj_Phi = zero;
-        DerivativeSolver(Lop, CG_src, spProj_Phi);
+        DerivativeSolverL(Lop, CG_src, spProj_Phi);
         Lop.Dtilde(spProj_Phi, Chi);
         G5R5(g5_R5_Chi, Chi);
         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
@@ -268,7 +314,7 @@ namespace QCD{
         Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
         G5R5(CG_src, Omega_spProj_Phi);
         spProj_Phi = zero;
-        DerivativeSolver(Rop, CG_src, spProj_Phi);
+        DerivativeSolverR(Rop, CG_src, spProj_Phi);
         Rop.Dtilde(spProj_Phi, Chi);
         G5R5(g5_R5_Chi, Chi);
         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
index 0f57fe9c..e9a8853a 100644
--- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
+++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
@@ -46,6 +46,7 @@ namespace Grid{
 
       OperatorFunction<FermionField> &DerivativeSolver;
       OperatorFunction<FermionField> &ActionSolver;
+      OperatorFunction<FermionField> &HeatbathSolver;
 
       FermionField PhiOdd;   // the pseudo fermion field for this trajectory
       FermionField PhiEven;  // the pseudo fermion field for this trajectory
@@ -54,11 +55,18 @@ namespace Grid{
       TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
                                                 FermionOperator<Impl>  &_DenOp, 
                                                 OperatorFunction<FermionField> & DS,
-                                                OperatorFunction<FermionField> & AS) :
+                                                OperatorFunction<FermionField> & AS ) : 
+      TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {};
+
+      TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
+                                                FermionOperator<Impl>  &_DenOp, 
+                                                OperatorFunction<FermionField> & DS,
+                                                OperatorFunction<FermionField> & AS, OperatorFunction<FermionField> & HS) :
       NumOp(_NumOp), 
       DenOp(_DenOp), 
       DerivativeSolver(DS), 
       ActionSolver(AS),
+      HeatbathSolver(HS),
       PhiEven(_NumOp.FermionRedBlackGrid()),
       PhiOdd(_NumOp.FermionRedBlackGrid()) 
         {
@@ -111,7 +119,7 @@ namespace Grid{
         // Odd det factors
         Mpc.MpcDag(etaOdd,PhiOdd);
         tmp=zero;
-        ActionSolver(Vpc,PhiOdd,tmp);
+        HeatbathSolver(Vpc,PhiOdd,tmp);
         Vpc.Mpc(tmp,PhiOdd);            
 
         // Even det factors
diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc
index 8f093d44..61b06829 100644
--- a/HMC/Mobius2p1fEOFA.cc
+++ b/HMC/Mobius2p1fEOFA.cc
@@ -30,6 +30,8 @@ directory
 /*  END LEGAL */
 #include <Grid/Grid.h>
 
+#define MIXED_PRECISION
+
 namespace Grid{ 
   namespace QCD{
 
@@ -63,9 +65,6 @@ namespace Grid{
     Integer TotalOuterIterations; //Number of restarts
     Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
 
-    //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
-    LinearFunction<FieldF> *guesser;
-    
     MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
 						    Integer maxinnerit, 
 						    Integer maxouterit, 
@@ -85,27 +84,26 @@ namespace Grid{
       MaxOuterIterations(maxouterit), 
       SinglePrecGrid4(_sp_grid4),
       SinglePrecGrid5(_sp_grid5),
-      OuterLoopNormMult(100.), 
-      guesser(NULL)
+      OuterLoopNormMult(100.) 
     { 
+      /* Debugging instances of objects; references are stored
       std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
       std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
       std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
       std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
+      */
     };
 
-      void useGuesser(LinearFunction<FieldF> &g){
-        guesser = &g;
-      }
-
     void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
 
+      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
 
       SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
       
-      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
-      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpU " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
-
+      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
+      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
+      // Assumption made in code to extract gauge field
+      // We could avoid storing LinopD reference alltogether ?
       assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
 
       ////////////////////////////////////////////////////////////////////////////////////
@@ -120,9 +118,13 @@ namespace Grid{
       GridBase * GridPtrD = FermOpD.Umu._grid;
       GaugeFieldF     U_f  (GridPtrF);
       GaugeLinkFieldF Umu_f(GridPtrF);
-      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
-      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
+      //      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
+      //      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
 
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Moving this to a Clone method of fermion operator would allow to duplicate the 
+      // physics parameters and decrease gauge field copies
+      ////////////////////////////////////////////////////////////////////////////////////
       GaugeLinkFieldD Umu_d(GridPtrD);
       for(int mu=0;mu<Nd*2;mu++){ 
 	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
@@ -131,11 +133,11 @@ namespace Grid{
       }
       pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
       pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
-      
 
       ////////////////////////////////////////////////////////////////////////////////////
       // Could test to make sure that LinOpF and LinOpD agree to single prec?
       ////////////////////////////////////////////////////////////////////////////////////
+      /*
       GridBase *Fgrid = psi._grid;
       FieldD tmp2(Fgrid);
       FieldD tmp1(Fgrid);
@@ -147,6 +149,7 @@ namespace Grid{
       std::cout << " Test of operators "<<norm2(tmp2)<<std::endl;
       tmp1=tmp1-tmp2;
       std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl;
+      */
 
       ////////////////////////////////////////////////////////////////////////////////////
       // Make a mixed precision conjugate gradient
@@ -171,6 +174,8 @@ int main(int argc, char **argv) {
   typedef WilsonImplR FermionImplPolicy;
   typedef MobiusFermionR FermionAction;
   typedef MobiusFermionF FermionActionF;
+  typedef MobiusEOFAFermionR FermionEOFAAction;
+  typedef MobiusEOFAFermionF FermionEOFAActionF;
   typedef typename FermionAction::FermionField FermionField;
   typedef typename FermionActionF::FermionField FermionFieldF;
 
@@ -188,7 +193,7 @@ int main(int argc, char **argv) {
   MD.trajL   = 1.0;
   
   HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 530;
+  HMCparams.StartTrajectory  = 590;
   HMCparams.Trajectories     = 1000;
   HMCparams.NoMetropolisUntil=  0;
   //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
@@ -257,8 +262,6 @@ int main(int argc, char **argv) {
   double ActionStoppingCondition     = 1e-10;
   double DerivativeStoppingCondition = 1e-6;
   double MaxCGIterations = 30000;
-  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
-  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
 
   ////////////////////////////////////
   // Collect actions
@@ -269,6 +272,13 @@ int main(int argc, char **argv) {
   ////////////////////////////////////
   // Strange action
   ////////////////////////////////////
+  typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
+  typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
+  typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
+  typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
+
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
 
   // DJM: setup for EOFA ratio (Mobius)
   OneFlavourRationalParams OFRp;
@@ -279,10 +289,67 @@ int main(int argc, char **argv) {
   OFRp.degree   = 14;
   OFRp.precision= 50;
 
-  MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
-  MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
+  
+  MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
+  MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
+  MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
+  MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
 
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> EOFA(Strange_Op_L, Strange_Op_R, ActionCG, DerivativeCG, OFRp, true);
+  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
+  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
+#ifdef MIXED_PRECISION
+  const int MX_inner = 1000;
+  // Mixed precision EOFA
+  LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
+  LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
+  LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF);
+  LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF);
+
+  MxPCG_EOFA ActionCGL(ActionStoppingCondition,
+		       MX_inner,
+		       MaxCGIterations,
+		       GridPtrF,
+		       FrbGridF,
+		       Strange_Op_LF,Strange_Op_L,
+		       Strange_LinOp_LF,Strange_LinOp_L);
+
+  MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition,
+			   MX_inner,
+			   MaxCGIterations,
+			   GridPtrF,
+			   FrbGridF,
+			   Strange_Op_LF,Strange_Op_L,
+			   Strange_LinOp_LF,Strange_LinOp_L);
+  
+  MxPCG_EOFA ActionCGR(ActionStoppingCondition,
+		       MX_inner,
+		       MaxCGIterations,
+		       GridPtrF,
+		       FrbGridF,
+		       Strange_Op_RF,Strange_Op_R,
+		       Strange_LinOp_RF,Strange_LinOp_R);
+  
+  MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition,
+			   MX_inner,
+			   MaxCGIterations,
+			   GridPtrF,
+			   FrbGridF,
+			   Strange_Op_RF,Strange_Op_R,
+			   Strange_LinOp_RF,Strange_LinOp_R);
+
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
+    EOFA(Strange_Op_L, Strange_Op_R, 
+	 ActionCG, 
+	 ActionCGL, ActionCGR,
+	 DerivativeCGL, DerivativeCGR,
+	 OFRp, true);
+#else
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
+    EOFA(Strange_Op_L, Strange_Op_R, 
+	 ActionCG, ActionCG,
+	 DerivativeCG, DerivativeCG,
+	 OFRp, true);
+#endif
   Level1.push_back(&EOFA);
 
   ////////////////////////////////////
@@ -299,21 +366,20 @@ int main(int argc, char **argv) {
   }
   light_num.push_back(pv_mass);
 
-  typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
-  typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
-
+  //////////////////////////////////////////////////////////////
+  // Forced to replicate the MxPCG and DenominatorsF etc.. because
+  // there is no convenient way to "Clone" physics params from double op
+  // into single op for any operator pair.
+  // Same issue prevents using MxPCG in the Heatbath step
+  //////////////////////////////////////////////////////////////
   std::vector<FermionAction *> Numerators;
   std::vector<FermionAction *> Denominators;
-  std::vector<LinearOperatorD *> LinOpD;
-
-  std::vector<FermionActionF *> DenominatorsF;
-  std::vector<LinearOperatorF *> LinOpF; 
-
-  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,
-							  LinearOperatorD,LinearOperatorF> MxPCG;
-  std::vector<MxPCG *> MPCG;
-
   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
+  std::vector<MxPCG *> ActionMPCG;
+  std::vector<MxPCG *> MPCG;
+  std::vector<FermionActionF *> DenominatorsF;
+  std::vector<LinearOperatorD *> LinOpD;
+  std::vector<LinearOperatorF *> LinOpF; 
 
   for(int h=0;h<n_hasenbusch+1;h++){
 
@@ -322,26 +388,38 @@ int main(int argc, char **argv) {
     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
 
-#if 0
-    ////////////////////////////////////////////////////////////////////////////
-    // Standard CG for 2f force
-    ////////////////////////////////////////////////////////////////////////////
-    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
-#else 
+#ifdef MIXED_PRECISION
     ////////////////////////////////////////////////////////////////////////////
     // Mixed precision CG for 2f force
     ////////////////////////////////////////////////////////////////////////////
+
     DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF));
-    LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
     LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
+    LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
+
     MPCG.push_back(new MxPCG(DerivativeStoppingCondition,
-			     200,
+			     MX_inner,
 			     MaxCGIterations,
 			     GridPtrF,
 			     FrbGridF,
 			     *DenominatorsF[h],*Denominators[h],
 			     *LinOpF[h], *LinOpD[h]) );
-    Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],ActionCG));
+
+    ActionMPCG.push_back(new MxPCG(ActionStoppingCondition,
+				   MX_inner,
+				   MaxCGIterations,
+				   GridPtrF,
+				   FrbGridF,
+				   *DenominatorsF[h],*Denominators[h],
+				   *LinOpF[h], *LinOpD[h]) );
+
+    // Heatbath not mixed yet. As inverts numerators not so important as raised mass.
+    Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG));
+#else
+    ////////////////////////////////////////////////////////////////////////////
+    // Standard CG for 2f force
+    ////////////////////////////////////////////////////////////////////////////
+    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
 #endif
 
   }
diff --git a/HMC/README b/HMC/README
index bf97701d..ad1af242 100644
--- a/HMC/README
+++ b/HMC/README
@@ -2,13 +2,28 @@
 TODO: 
 ********************************************************************
 
-- Currently 10 traj per hour
+i) Got mixed precision in 2f and EOFA force and action solves.
+   But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to
+   reduce the number of solver and action objects. Needed ideally for the EOFA heatbath.
+   15% perhaps
+   Combine with 2x trajectory length?
+
+ii) Rational on EOFA HB  -- relax order
+                         -- Test the approx as per David email
+
+Resume / roll.sh 
+
+----------------------------------------------------------------
+
+- 16^3 Currently 10 traj per hour
 
 - EOFA use a different derivative solver from action solver
 - EOFA fix Davids hack to the SchurRedBlack guessing
 
 *** Reduce precision/tolerance  in EOFA with second CG param.                          (10% speed up)
 *** Force gradient - reduced precision solve for the gradient                          (4/3x speedup)
+
+
 *** Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
     -- Store the single prec action operator.
     -- Clone the gauge field from the operator function argument.
diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc
index d1ec1309..522e0cb5 100644
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -370,5 +370,18 @@ int main(int argc,char **argv)
   tensorConvTest(rng, SpinMatrix);
   tensorConvTest(rng, SpinVector);
 
+  {
+    HMCparameters HMCparams;
+    HMCparams.StartingType     =std::string("CheckpointStart");
+    HMCparams.StartTrajectory  =7;
+    HMCparams.Trajectories     =1000;
+    HMCparams.NoMetropolisUntil=0;
+    HMCparams.MD.name          =std::string("Force Gradient");
+    HMCparams.MD.MDsteps       = 10;
+    HMCparams.MD.trajL         = 1.0;
+
+    XmlWriter HMCwr("HMCparameters.xml");
+    write(HMCwr,"HMCparameters",HMCparams);
+  }
   Grid_finalize();
 }