Merge remote-tracking branch 'upstream/develop' into feature/kl2QED

2025-11-18 12:29:31 +00:00 · 2019-05-23 12:41:54 +01:00
parent ae5ad986e2 918e673078
commit e675c6a48c
14 changed files with 466 additions and 70 deletions
--- a/Grid/algorithms/LinearOperator.h
+++ b/Grid/algorithms/LinearOperator.h
@@ -212,9 +212,8 @@ namespace Grid {
    };
    template<class Matrix,class Field>
    class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
    protected:
      Matrix &_Mat;
    public:
      Matrix &_Mat;
      SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
      virtual  RealD Mpc      (const Field &in, Field &out) {
      Field tmp(in._grid);
--- a/Grid/algorithms/iterative/ConjugateGradient.h
+++ b/Grid/algorithms/iterative/ConjugateGradient.h
@@ -89,6 +89,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
    // Check if guess is really REALLY good :)
    if (cp <= rsq) {
      std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
      IterationsToComplete = 0;	
      return;
    }
@@ -104,7 +106,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
    SolverTimer.Start();
    int k;
-    for (k = 1; k <= MaxIterations*1000; k++) {
+    for (k = 1; k <= MaxIterations; k++) {
      c = cp;
      MatrixTimer.Start();
@@ -165,8 +167,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
        return;
      }
    }
-    std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
+    std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl;
              << std::endl;
    if (ErrorOnNoConverge) assert(0);
    IterationsToComplete = k;
--- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
+++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
@@ -30,8 +30,11 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
 namespace Grid {
  //Mixed precision restarted defect correction CG
-  template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
+  template<class FieldD,class FieldF, 
    typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
    typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
  class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
  public:                                                
    RealD   Tolerance;
@@ -50,7 +53,12 @@ namespace Grid {
    //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
    LinearFunction<FieldF> *guesser;
-    MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
+    MixedPrecisionConjugateGradient(RealD tol, 
 				    Integer maxinnerit, 
 				    Integer maxouterit, 
 				    GridBase* _sp_grid, 
 				    LinearOperatorBase<FieldF> &_Linop_f, 
 				    LinearOperatorBase<FieldD> &_Linop_d) :
      Linop_f(_Linop_f), Linop_d(_Linop_d),
      Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
      OuterLoopNormMult(100.), guesser(NULL){ };
@@ -149,6 +157,8 @@ namespace Grid {
    }
  };
 }
 #endif
--- a/Grid/algorithms/iterative/Deflation.h
+++ b/Grid/algorithms/iterative/Deflation.h
@@ -35,7 +35,11 @@ class ZeroGuesser: public LinearFunction<Field> {
 public:
  virtual void operator()(const Field &src, Field &guess) { guess = zero; };
 };
-
+template<class Field>
 class DoNothingGuesser: public LinearFunction<Field> {
 public:
  virtual void operator()(const Field &src, Field &guess) {  };
 };
 template<class Field>
 class SourceGuesser: public LinearFunction<Field> {
 public:
--- a/Grid/algorithms/iterative/SchurRedBlack.h
+++ b/Grid/algorithms/iterative/SchurRedBlack.h
@@ -261,7 +261,7 @@ namespace Grid {
    }     
    /////////////////////////////////////////////////////////////
-    // Override in derived. Not virtual as template methods
+    // Override in derived. 
    /////////////////////////////////////////////////////////////
    virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0;
    virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0;
--- a/Grid/communicator/SharedMemory.h
+++ b/Grid/communicator/SharedMemory.h
@@ -103,6 +103,8 @@ class GlobalSharedMemory {
  //////////////////////////////////////////////////////////////////////////////////////
  static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
  static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
  static void OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
  static void OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
  ///////////////////////////////////////////////////
  // Provide shared memory facilities off comm world
  ///////////////////////////////////////////////////
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -132,7 +132,22 @@ int Log2Size(int TwoToPower,int MAXLOG2)
 }
 void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 {
-#ifdef HYPERCUBE
+  //////////////////////////////////////////////////////////////////////////////
  // Look and see if it looks like an HPE 8600 based on hostname conventions
  //////////////////////////////////////////////////////////////////////////////
  const int namelen = _POSIX_HOST_NAME_MAX;
  char name[namelen];
  int R;
  int I;
  int N;
  gethostname(name,namelen);
  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
  if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm);
  else         OptimalCommunicatorSharedMemory(processors,optimal_comm);
 }
 void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 {
  ////////////////////////////////////////////////////////////////
  // Assert power of two shm_size.
  ////////////////////////////////////////////////////////////////
@@ -253,7 +268,9 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
  /////////////////////////////////////////////////////////////////
  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
  assert(ierr==0);
-#else 
+}
 void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 {
  ////////////////////////////////////////////////////////////////
  // Assert power of two shm_size.
  ////////////////////////////////////////////////////////////////
@@ -306,7 +323,6 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
  /////////////////////////////////////////////////////////////////
  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
  assert(ierr==0);
 #endif
 }
 ////////////////////////////////////////////////////////////////////////////////////////////
 // SHMGET
@@ -337,7 +353,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
        int errsv = errno;
        printf("Errno %d\n",errsv);
        printf("key   %d\n",key);
-        printf("size  %lld\n",size);
+        printf("size  %ld\n",size);
        printf("flags %d\n",flags);
        perror("shmget");
        exit(1);
--- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
+++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
@@ -58,13 +58,29 @@ namespace QCD{
      bool use_heatbath_forecasting;
      AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
      AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
-      SchurRedBlackDiagMooeeSolve<FermionField> Solver;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverHB;
      SchurRedBlackDiagMooeeSolve<FermionField> SolverL;
      SchurRedBlackDiagMooeeSolve<FermionField> SolverR;
      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL;
      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR;
      FermionField Phi; // the pseudofermion field for this trajectory
    public:
-      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
+      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, 
-        OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), 
+					      AbstractEOFAFermion<Impl>& _Rop,
-        Solver(S, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
+					      OperatorFunction<FermionField>& HeatbathCG, 
 					      OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR, 
 					      OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR, 
 					      Params& p, 
 					      bool use_fc=false) : 
        Lop(_Lop), 
 	Rop(_Rop), 
 	SolverHB(HeatbathCG,false,true),
 	SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), 
 	DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), 
 	Phi(_Lop.FermionGrid()), 
 	param(p), 
        use_heatbath_forecasting(use_fc)
      {
        AlgRemez remez(param.lo, param.hi, param.precision);
@@ -98,6 +114,9 @@ namespace QCD{
      // We generate a Gaussian noise vector \eta, and then compute
      //  \Phi = M_{\rm EOFA}^{-1/2} * \eta
      // using a rational approximation to the inverse square root
      //
      // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta
      //
      virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
      {
        Lop.ImportGauge(U);
@@ -118,7 +137,6 @@ namespace QCD{
        RealD scale = std::sqrt(0.5);
        gaussian(pRNG,eta);
        eta = eta * scale;
        printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
        // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
        RealD N(PowerNegHalf.norm);
@@ -139,11 +157,11 @@ namespace QCD{
          if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
            Lop.Mdag(CG_src, Forecast_src);
            CG_soln = Forecast(Lop, Forecast_src, prev_solns);
-            Solver(Lop, CG_src, CG_soln);
+            SolverHB(Lop, CG_src, CG_soln);
            prev_solns.push_back(CG_soln);
          } else {
            CG_soln = zero; // Just use zero as the initial guess
-            Solver(Lop, CG_src, CG_soln);
+            SolverHB(Lop, CG_src, CG_soln);
          }
          Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
          tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
@@ -166,11 +184,11 @@ namespace QCD{
          if(use_heatbath_forecasting){
            Rop.Mdag(CG_src, Forecast_src);
            CG_soln = Forecast(Rop, Forecast_src, prev_solns);
-            Solver(Rop, CG_src, CG_soln);
+            SolverHB(Rop, CG_src, CG_soln);
            prev_solns.push_back(CG_soln);
          } else {
            CG_soln = zero;
-            Solver(Rop, CG_src, CG_soln);
+            SolverHB(Rop, CG_src, CG_soln);
          }
          Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
          tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
@@ -182,8 +200,47 @@ namespace QCD{
        // Reset shift coefficients for energy and force evals
        Lop.RefreshShiftCoefficients(0.0);
        Rop.RefreshShiftCoefficients(-1.0);
 	// Bounds check
 	RealD EtaDagEta = norm2(eta);
 	//	RealD PhiDagMPhi= norm2(eta);
      };
      void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi) 
      {
 #if 0
        Lop.ImportGauge(U);
        Rop.ImportGauge(U);
        FermionField spProj_Phi(Lop.FermionGrid());
 	FermionField mPhi(Lop.FermionGrid());
        std::vector<FermionField> tmp(2, Lop.FermionGrid());
 	mPhi = phi;
        // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
        spProj(Phi, spProj_Phi, -1, Lop.Ls);
        Lop.Omega(spProj_Phi, tmp[0], -1, 0);
        G5R5(tmp[1], tmp[0]);
        tmp[0] = zero;
        SolverL(Lop, tmp[1], tmp[0]);
        Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
        Lop.Omega(tmp[1], tmp[0], -1, 1);
 	mPhi = mPhi -  Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
        // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
        //               - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
        spProj(Phi, spProj_Phi, 1, Rop.Ls);
        Rop.Omega(spProj_Phi, tmp[0], 1, 0);
        G5R5(tmp[1], tmp[0]);
        tmp[0] = zero;
        SolverR(Rop, tmp[1], tmp[0]);
        Rop.Dtilde(tmp[0], tmp[1]);
        Rop.Omega(tmp[1], tmp[0], 1, 1);
        action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
 #endif
      }
      // EOFA action: see Eqn. (10) of arXiv:1706.05843
      virtual RealD S(const GaugeField& U)
      {
@@ -201,7 +258,7 @@ namespace QCD{
        Lop.Omega(spProj_Phi, tmp[0], -1, 0);
        G5R5(tmp[1], tmp[0]);
        tmp[0] = zero;
-        Solver(Lop, tmp[1], tmp[0]);
+        SolverL(Lop, tmp[1], tmp[0]);
        Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
        Lop.Omega(tmp[1], tmp[0], -1, 1);
        action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
@@ -212,7 +269,7 @@ namespace QCD{
        Rop.Omega(spProj_Phi, tmp[0], 1, 0);
        G5R5(tmp[1], tmp[0]);
        tmp[0] = zero;
-        Solver(Rop, tmp[1], tmp[0]);
+        SolverR(Rop, tmp[1], tmp[0]);
        Rop.Dtilde(tmp[0], tmp[1]);
        Rop.Omega(tmp[1], tmp[0], 1, 1);
        action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
@@ -245,7 +302,7 @@ namespace QCD{
        Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
        G5R5(CG_src, Omega_spProj_Phi);
        spProj_Phi = zero;
-        Solver(Lop, CG_src, spProj_Phi);
+        DerivativeSolverL(Lop, CG_src, spProj_Phi);
        Lop.Dtilde(spProj_Phi, Chi);
        G5R5(g5_R5_Chi, Chi);
        Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
@@ -257,7 +314,7 @@ namespace QCD{
        Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
        G5R5(CG_src, Omega_spProj_Phi);
        spProj_Phi = zero;
-        Solver(Rop, CG_src, spProj_Phi);
+        DerivativeSolverR(Rop, CG_src, spProj_Phi);
        Rop.Dtilde(spProj_Phi, Chi);
        G5R5(g5_R5_Chi, Chi);
        Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
--- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
+++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
@@ -46,6 +46,7 @@ namespace Grid{
      OperatorFunction<FermionField> &DerivativeSolver;
      OperatorFunction<FermionField> &ActionSolver;
      OperatorFunction<FermionField> &HeatbathSolver;
      FermionField PhiOdd;   // the pseudo fermion field for this trajectory
      FermionField PhiEven;  // the pseudo fermion field for this trajectory
@@ -55,10 +56,17 @@ namespace Grid{
                                                FermionOperator<Impl>  &_DenOp, 
                                                OperatorFunction<FermionField> & DS,
                                                OperatorFunction<FermionField> & AS ) : 
      TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {};
      TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
                                                FermionOperator<Impl>  &_DenOp, 
                                                OperatorFunction<FermionField> & DS,
                                                OperatorFunction<FermionField> & AS, OperatorFunction<FermionField> & HS) :
      NumOp(_NumOp), 
      DenOp(_DenOp), 
      DerivativeSolver(DS), 
      ActionSolver(AS),
      HeatbathSolver(HS),
      PhiEven(_NumOp.FermionRedBlackGrid()),
      PhiOdd(_NumOp.FermionRedBlackGrid()) 
        {
@@ -111,7 +119,7 @@ namespace Grid{
        // Odd det factors
        Mpc.MpcDag(etaOdd,PhiOdd);
        tmp=zero;
-        ActionSolver(Vpc,PhiOdd,tmp);
+        HeatbathSolver(Vpc,PhiOdd,tmp);
        Vpc.Mpc(tmp,PhiOdd);            
        // Even det factors
--- a/Grid/qcd/hmc/integrators/Integrator.h
+++ b/Grid/qcd/hmc/integrators/Integrator.h
@@ -54,7 +54,7 @@ public:
  template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 >
  IntegratorParameters(ReaderClass & Reader){
-    std::cout << "Reading integrator\n";
+    std::cout << GridLogMessage << "Reading integrator\n";
    read(Reader, "Integrator", *this);
  }
@@ -132,7 +132,7 @@ class Integrator {
      double end_full = usecond();
      double time_full  = (end_full - start_full) / 1e3;
      double time_force = (end_force - start_force) / 1e3;
-      std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl;
+      std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl;
    }
    // Force from the other representations
@@ -237,8 +237,7 @@ class Integrator {
      for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
        // get gauge field from the SmearingPolicy and
        // based on the boolean is_smeared in actionID
-        Field& Us =
+        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
            Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
        as[level].actions.at(actionID)->refresh(Us, pRNG);
      }
@@ -251,13 +250,11 @@ class Integrator {
  // over the representations
  struct _S {
    template <class FieldType, class Repr>
-    void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
+    void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, int level, RealD& H) {
                    int level, RealD& H) {
      for (int a = 0; a < repr_set.size(); ++a) {
        RealD Hterm = repr_set.at(a)->S(Rep.U);
-        std::cout << GridLogMessage << "S Level " << level << " term " << a
+        std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl;
                  << " H Hirep = " << Hterm << std::endl;
        H += Hterm;
      }
@@ -267,9 +264,10 @@ class Integrator {
  // Calculate action
  RealD S(Field& U) {  // here also U not used
    std::cout << GridLogIntegrator << "Integrator action\n";
    RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
-    std::cout << " Momentum hamiltonian "<< -H<<std::endl;
+
    RealD Hterm;
    // Actions
@@ -278,9 +276,9 @@ class Integrator {
        // get gauge field from the SmearingPolicy and
        // based on the boolean is_smeared in actionID
        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
        Hterm = as[level].actions.at(actionID)->S(Us);
-        std::cout << GridLogMessage << "S Level " << level << " term "
+        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
                  << actionID << " H = " << Hterm << std::endl;
        H += Hterm;
      }
      as[level].apply(S_hireps, Representations, level, H);
@@ -305,8 +303,7 @@ class Integrator {
    // Check the clocks all match on all levels
    for (int level = 0; level < as.size(); ++level) {
      assert(fabs(t_U - t_P[level]) < 1.0e-6);  // must be the same
-      std::cout << GridLogIntegrator << " times[" << level
+      std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl;
                << "]= " << t_P[level] << " " << t_U << std::endl;
    }
    // and that we indeed got to the end of the trajectory
--- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h
+++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h
@@ -231,8 +231,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
    Field Pfg(U._grid);
    Ufg = U;
    Pfg = zero;
-    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
+    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep << std::endl;
              << std::endl;
    // prepare_fg; no prediction/result cache for now
    // could relax CG stopping conditions for the
    // derivatives in the small step since the force gets multiplied by
@@ -271,8 +270,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
        this->step(U, level + 1, first_step, 0);
      }
-      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps),
+      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps);
                        (1.0 - 2.0 * lambda) * eps);
      if (level == fl) {  // lowest level
        this->update_U(U, 0.5 * eps);
--- a/HMC/Mobius2p1fEOFA.cc
+++ b/HMC/Mobius2p1fEOFA.cc
@@ -30,6 +30,137 @@ directory
 /*  END LEGAL */
 #include <Grid/Grid.h>
 #define MIXED_PRECISION
 namespace Grid{ 
  namespace QCD{
  /*
   * Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
   *    -- Store the single prec action operator.
   *    -- Clone the gauge field from the operator function argument.
   *    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
   */
  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
  public:
    typedef typename FermionOperatorD::FermionField FieldD;
    typedef typename FermionOperatorF::FermionField FieldF;
    RealD   Tolerance;
    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
    Integer MaxInnerIterations;
    Integer MaxOuterIterations;
    GridBase* SinglePrecGrid4; //Grid for single-precision fields
    GridBase* SinglePrecGrid5; //Grid for single-precision fields
    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
    FermionOperatorF &FermOpF;
    FermionOperatorD &FermOpD;;
    SchurOperatorF &LinOpF;
    SchurOperatorD &LinOpD;
    Integer TotalInnerIterations; //Number of inner CG iterations
    Integer TotalOuterIterations; //Number of restarts
    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
 						    Integer maxinnerit, 
 						    Integer maxouterit, 
 						    GridBase* _sp_grid4, 
 						    GridBase* _sp_grid5, 
 						    FermionOperatorF &_FermOpF,
 						    FermionOperatorD &_FermOpD,
 						    SchurOperatorF   &_LinOpF,
 						    SchurOperatorD   &_LinOpD): 
      LinOpF(_LinOpF),
      LinOpD(_LinOpD),
      FermOpF(_FermOpF),
      FermOpD(_FermOpD),
      Tolerance(tol), 
      InnerTolerance(tol), 
      MaxInnerIterations(maxinnerit), 
      MaxOuterIterations(maxouterit), 
      SinglePrecGrid4(_sp_grid4),
      SinglePrecGrid5(_sp_grid5),
      OuterLoopNormMult(100.) 
    { 
      /* Debugging instances of objects; references are stored
      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
      */
    };
    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
      // Assumption made in code to extract gauge field
      // We could avoid storing LinopD reference alltogether ?
      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
      ////////////////////////////////////////////////////////////////////////////////////
      // Must snarf a single precision copy of the gauge field in Linop_d argument
      ////////////////////////////////////////////////////////////////////////////////////
      typedef typename FermionOperatorF::GaugeField GaugeFieldF;
      typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
      typedef typename FermionOperatorD::GaugeField GaugeFieldD;
      typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
      GridBase * GridPtrF = SinglePrecGrid4;
      GridBase * GridPtrD = FermOpD.Umu._grid;
      GaugeFieldF     U_f  (GridPtrF);
      GaugeLinkFieldF Umu_f(GridPtrF);
      //      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
      //      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
      ////////////////////////////////////////////////////////////////////////////////////
      // Moving this to a Clone method of fermion operator would allow to duplicate the 
      // physics parameters and decrease gauge field copies
      ////////////////////////////////////////////////////////////////////////////////////
      GaugeLinkFieldD Umu_d(GridPtrD);
      for(int mu=0;mu<Nd*2;mu++){ 
 	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
 	precisionChange(Umu_f,Umu_d);
 	PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
      }
      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
      ////////////////////////////////////////////////////////////////////////////////////
      // Could test to make sure that LinOpF and LinOpD agree to single prec?
      ////////////////////////////////////////////////////////////////////////////////////
      /*
      GridBase *Fgrid = psi._grid;
      FieldD tmp2(Fgrid);
      FieldD tmp1(Fgrid);
      LinOpU.Op(src,tmp1);
      LinOpD.Op(src,tmp2);
      std::cout << " Double gauge field "<< norm2(FermOpD.Umu)<<std::endl;
      std::cout << " Single gauge field "<< norm2(FermOpF.Umu)<<std::endl;
      std::cout << " Test of operators "<<norm2(tmp1)<<std::endl;
      std::cout << " Test of operators "<<norm2(tmp2)<<std::endl;
      tmp1=tmp1-tmp2;
      std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl;
      */
      ////////////////////////////////////////////////////////////////////////////////////
      // Make a mixed precision conjugate gradient
      ////////////////////////////////////////////////////////////////////////////////////
      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
      MPCG(src,psi);
    }
  };
 }};
 int main(int argc, char **argv) {
  using namespace Grid;
  using namespace Grid::QCD;
@@ -42,7 +173,11 @@ int main(int argc, char **argv) {
   // Typedefs to simplify notation
  typedef WilsonImplR FermionImplPolicy;
  typedef MobiusFermionR FermionAction;
  typedef MobiusFermionF FermionActionF;
  typedef MobiusEOFAFermionR FermionEOFAAction;
  typedef MobiusEOFAFermionF FermionEOFAActionF;
  typedef typename FermionAction::FermionField FermionField;
  typedef typename FermionActionF::FermionField FermionFieldF;
  typedef Grid::XmlReader       Serialiser;
@@ -54,12 +189,12 @@ int main(int argc, char **argv) {
  MD.name    = std::string("Force Gradient");
  //  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; 
  //  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 8;
+  MD.MDsteps = 6;
  MD.trajL   = 1.0;
  HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 70;
+  HMCparams.StartTrajectory  = 590;
-  HMCparams.Trajectories     = 200;
+  HMCparams.Trajectories     = 1000;
  HMCparams.NoMetropolisUntil=  0;
  //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
  //  HMCparams.StartingType     =std::string("ColdStart");
@@ -97,42 +232,53 @@ int main(int argc, char **argv) {
  RealD b   = 1.0; 
  RealD c   = 0.0;
-  std::vector<Real> hasenbusch({ 0.1, 0.3 });
+  std::vector<Real> hasenbusch({ 0.1, 0.3, 0.6 });
  auto GridPtr   = TheHMC.Resources.GetCartesian();
  auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
  auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
  auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
  std::vector<int> latt  = GridDefaultLatt();
  std::vector<int> mpi   = GridDefaultMpi();
  std::vector<int> simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
  std::vector<int> simdD = GridDefaultSimd(Nd,vComplexD::Nsimd());
  auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi);
  auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
  IwasakiGaugeActionR GaugeAction(beta);
  // temporarily need a gauge field
  LatticeGaugeField U(GridPtr);
  LatticeGaugeFieldF UF(GridPtrF);
  // These lines are unecessary if BC are all periodic
  std::vector<Complex> boundary = {1,1,1,-1};
  FermionAction::ImplParams Params(boundary);
  FermionActionF::ImplParams ParamsF(boundary);
  double ActionStoppingCondition     = 1e-10;
-  double DerivativeStoppingCondition = 1e-7;
+  double DerivativeStoppingCondition = 1e-6;
  double MaxCGIterations = 30000;
  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
  ////////////////////////////////////
  // Collect actions
  ////////////////////////////////////
  ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(4);
+  ActionLevel<HMCWrapper::Field> Level2(8);
  ////////////////////////////////////
  // Strange action
  ////////////////////////////////////
  typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
  typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
  typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
  typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
-  //  FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
-  //  FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params);
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
  //  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
  //  Level1.push_back(&StrangePseudoFermion);
  // DJM: setup for EOFA ratio (Mobius)
  OneFlavourRationalParams OFRp;
@@ -143,9 +289,67 @@ int main(int argc, char **argv) {
  OFRp.degree   = 14;
  OFRp.precision= 50;
  MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
  MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
  MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> EOFA(Strange_Op_L, Strange_Op_R, ActionCG, OFRp, true);
+  MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
 #ifdef MIXED_PRECISION
  const int MX_inner = 1000;
  // Mixed precision EOFA
  LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
  LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
  LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF);
  LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF);
  MxPCG_EOFA ActionCGL(ActionStoppingCondition,
 		       MX_inner,
 		       MaxCGIterations,
 		       GridPtrF,
 		       FrbGridF,
 		       Strange_Op_LF,Strange_Op_L,
 		       Strange_LinOp_LF,Strange_LinOp_L);
  MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition,
 			   MX_inner,
 			   MaxCGIterations,
 			   GridPtrF,
 			   FrbGridF,
 			   Strange_Op_LF,Strange_Op_L,
 			   Strange_LinOp_LF,Strange_LinOp_L);
  MxPCG_EOFA ActionCGR(ActionStoppingCondition,
 		       MX_inner,
 		       MaxCGIterations,
 		       GridPtrF,
 		       FrbGridF,
 		       Strange_Op_RF,Strange_Op_R,
 		       Strange_LinOp_RF,Strange_LinOp_R);
  MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition,
 			   MX_inner,
 			   MaxCGIterations,
 			   GridPtrF,
 			   FrbGridF,
 			   Strange_Op_RF,Strange_Op_R,
 			   Strange_LinOp_RF,Strange_LinOp_R);
  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
    EOFA(Strange_Op_L, Strange_Op_R, 
 	 ActionCG, 
 	 ActionCGL, ActionCGR,
 	 DerivativeCGL, DerivativeCGR,
 	 OFRp, true);
 #else
  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
    EOFA(Strange_Op_L, Strange_Op_R, 
 	 ActionCG, ActionCG,
 	 DerivativeCG, DerivativeCG,
 	 OFRp, true);
 #endif
  Level1.push_back(&EOFA);
  ////////////////////////////////////
@@ -162,15 +366,62 @@ int main(int argc, char **argv) {
  }
  light_num.push_back(pv_mass);
  //////////////////////////////////////////////////////////////
  // Forced to replicate the MxPCG and DenominatorsF etc.. because
  // there is no convenient way to "Clone" physics params from double op
  // into single op for any operator pair.
  // Same issue prevents using MxPCG in the Heatbath step
  //////////////////////////////////////////////////////////////
  std::vector<FermionAction *> Numerators;
  std::vector<FermionAction *> Denominators;
  std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
  std::vector<MxPCG *> ActionMPCG;
  std::vector<MxPCG *> MPCG;
  std::vector<FermionActionF *> DenominatorsF;
  std::vector<LinearOperatorD *> LinOpD;
  std::vector<LinearOperatorF *> LinOpF; 
  for(int h=0;h<n_hasenbusch+1;h++){
    std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
    Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
    Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
 #ifdef MIXED_PRECISION
    ////////////////////////////////////////////////////////////////////////////
    // Mixed precision CG for 2f force
    ////////////////////////////////////////////////////////////////////////////
    DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF));
    LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
    LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
    MPCG.push_back(new MxPCG(DerivativeStoppingCondition,
 			     MX_inner,
 			     MaxCGIterations,
 			     GridPtrF,
 			     FrbGridF,
 			     *DenominatorsF[h],*Denominators[h],
 			     *LinOpF[h], *LinOpD[h]) );
    ActionMPCG.push_back(new MxPCG(ActionStoppingCondition,
 				   MX_inner,
 				   MaxCGIterations,
 				   GridPtrF,
 				   FrbGridF,
 				   *DenominatorsF[h],*Denominators[h],
 				   *LinOpF[h], *LinOpD[h]) );
    // Heatbath not mixed yet. As inverts numerators not so important as raised mass.
    Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG));
 #else
    ////////////////////////////////////////////////////////////////////////////
    // Standard CG for 2f force
    ////////////////////////////////////////////////////////////////////////////
    Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
 #endif
  }
  for(int h=0;h<n_hasenbusch+1;h++){
--- a/HMC/README
+++ b/HMC/README
@@ -1,14 +1,54 @@
 * Sign off 2+1f HMC with Hasenbush and strange RHMC
 - Wilson plaquette cross checked against CPS and literature GwilsonFnone
 - Timesteps matched
 - Use 16^3x32 
 ********************************************************************
-* From previous CPS runs:
+TODO: 
 ********************************************************************
 i) Got mixed precision in 2f and EOFA force and action solves.
   But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to
   reduce the number of solver and action objects. Needed ideally for the EOFA heatbath.
   15% perhaps
   Combine with 2x trajectory length?
 ii) Rational on EOFA HB  -- relax order
                         -- Test the approx as per David email
 Resume / roll.sh 
 ----------------------------------------------------------------
 - 16^3 Currently 10 traj per hour
 - EOFA use a different derivative solver from action solver
 - EOFA fix Davids hack to the SchurRedBlack guessing
 *** Reduce precision/tolerance  in EOFA with second CG param.                          (10% speed up)
 *** Force gradient - reduced precision solve for the gradient                          (4/3x speedup)
 *** Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
    -- Store the single prec action operator.
    -- Clone the gauge field from the operator function argument.
    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
 *** Mixed precision CG into EOFA portion         
 *** Further reduce precision in forces to 10^-6 ?
 *** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3.
 - Use mixed precision CG in HMC                           
 - SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead.
 - Or make an OperatorFunction for mixed precision as a wrapper
 ********************************************************************
 * Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
 * Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
 * Wilson plaquette cross checked against CPS and literature GwilsonFnone
 ********************************************************************
 ********************************************************************
 * RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs:
 ********************************************************************
 ****
 Strange (m=0.04)  has eigenspan 
 **** 
 16^3 done as 1+1+1 with separate PV's. 
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -370,5 +370,18 @@ int main(int argc,char **argv)
  tensorConvTest(rng, SpinMatrix);
  tensorConvTest(rng, SpinVector);
  {
    HMCparameters HMCparams;
    HMCparams.StartingType     =std::string("CheckpointStart");
    HMCparams.StartTrajectory  =7;
    HMCparams.Trajectories     =1000;
    HMCparams.NoMetropolisUntil=0;
    HMCparams.MD.name          =std::string("Force Gradient");
    HMCparams.MD.MDsteps       = 10;
    HMCparams.MD.trajL         = 1.0;
    XmlWriter HMCwr("HMCparameters.xml");
    write(HMCwr,"HMCparameters",HMCparams);
  }
  Grid_finalize();
 }