diff --git a/ChangeLog b/ChangeLog
index e69de29b..a0211af1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -0,0 +1,5 @@
+Version : 0.8.0
+
+- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
+- MPI and MPI3 comms optimisations for KNL and OPA finished
+- Half precision comms
diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h
index b86863b8..a1be48f4 100644
--- a/Grid/algorithms/LinearOperator.h
+++ b/Grid/algorithms/LinearOperator.h
@@ -178,7 +178,7 @@ namespace Grid {
     //////////////////////////////////////////////////////////
 
     template<class Field>
-      class SchurOperatorBase :  public LinearOperatorBase<Field> {
+    class SchurOperatorBase :  public LinearOperatorBase<Field> {
     public:
       virtual  RealD Mpc      (const Field &in, Field &out) =0;
       virtual  RealD MpcDag   (const Field &in, Field &out) =0;
@@ -211,10 +211,9 @@ namespace Grid {
       }
     };
     template<class Matrix,class Field>
-      class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
-    protected:
-      Matrix &_Mat;
+    class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
     public:
+      Matrix &_Mat;
       SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
       virtual  RealD Mpc      (const Field &in, Field &out) {
       Field tmp(in._grid);
diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h
index c8daa995..c1717e2a 100644
--- a/Grid/algorithms/iterative/ConjugateGradient.h
+++ b/Grid/algorithms/iterative/ConjugateGradient.h
@@ -89,6 +89,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
 
     // Check if guess is really REALLY good :)
     if (cp <= rsq) {
+      std::cout << GridLogMessage << "ConjugateGradient guess is converged already " << std::endl;
+      IterationsToComplete = 0;	
       return;
     }
 
@@ -104,7 +106,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
 
     SolverTimer.Start();
     int k;
-    for (k = 1; k <= MaxIterations*1000; k++) {
+    for (k = 1; k <= MaxIterations; k++) {
       c = cp;
 
       MatrixTimer.Start();
@@ -165,8 +167,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
         return;
       }
     }
-    std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
-              << std::endl;
+    std::cout << GridLogMessage << "ConjugateGradient did NOT converge "<<k<<" / "<< MaxIterations<< std::endl;
 
     if (ErrorOnNoConverge) assert(0);
     IterationsToComplete = k;
diff --git a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
index c7332455..4c2cf12f 100644
--- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
+++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
@@ -30,8 +30,11 @@ Author: Christopher Kelly <ckelly@phys.columbia.edu>
 
 namespace Grid {
 
+
   //Mixed precision restarted defect correction CG
-  template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
+  template<class FieldD,class FieldF, 
+    typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
+    typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0> 
   class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
   public:                                                
     RealD   Tolerance;
@@ -50,7 +53,12 @@ namespace Grid {
     //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
     LinearFunction<FieldF> *guesser;
     
-    MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
+    MixedPrecisionConjugateGradient(RealD tol, 
+				    Integer maxinnerit, 
+				    Integer maxouterit, 
+				    GridBase* _sp_grid, 
+				    LinearOperatorBase<FieldF> &_Linop_f, 
+				    LinearOperatorBase<FieldD> &_Linop_d) :
       Linop_f(_Linop_f), Linop_d(_Linop_d),
       Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
       OuterLoopNormMult(100.), guesser(NULL){ };
@@ -149,6 +157,8 @@ namespace Grid {
     }
   };
 
+
+
 }
 
 #endif
diff --git a/Grid/algorithms/iterative/Deflation.h b/Grid/algorithms/iterative/Deflation.h
index ceba1c09..8df67b3c 100644
--- a/Grid/algorithms/iterative/Deflation.h
+++ b/Grid/algorithms/iterative/Deflation.h
@@ -35,7 +35,11 @@ class ZeroGuesser: public LinearFunction<Field> {
 public:
   virtual void operator()(const Field &src, Field &guess) { guess = zero; };
 };
-
+template<class Field>
+class DoNothingGuesser: public LinearFunction<Field> {
+public:
+  virtual void operator()(const Field &src, Field &guess) {  };
+};
 template<class Field>
 class SourceGuesser: public LinearFunction<Field> {
 public:
diff --git a/Grid/algorithms/iterative/SchurRedBlack.h b/Grid/algorithms/iterative/SchurRedBlack.h
index 6c1e1f1c..9f63e8c0 100644
--- a/Grid/algorithms/iterative/SchurRedBlack.h
+++ b/Grid/algorithms/iterative/SchurRedBlack.h
@@ -261,7 +261,7 @@ namespace Grid {
     }     
     
     /////////////////////////////////////////////////////////////
-    // Override in derived. Not virtual as template methods
+    // Override in derived. 
     /////////////////////////////////////////////////////////////
     virtual void RedBlackSource  (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)                =0;
     virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)          =0;
diff --git a/Grid/communicator/SharedMemory.h b/Grid/communicator/SharedMemory.h
index 9f6b1a25..63cfc7a6 100644
--- a/Grid/communicator/SharedMemory.h
+++ b/Grid/communicator/SharedMemory.h
@@ -103,6 +103,8 @@ class GlobalSharedMemory {
   //////////////////////////////////////////////////////////////////////////////////////
   static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
   static void OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
   ///////////////////////////////////////////////////
   // Provide shared memory facilities off comm world
   ///////////////////////////////////////////////////
diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc
index d3b094ad..b2a6ab53 100644
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -132,7 +132,22 @@ int Log2Size(int TwoToPower,int MAXLOG2)
 }
 void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
 {
-#ifdef HYPERCUBE
+  //////////////////////////////////////////////////////////////////////////////
+  // Look and see if it looks like an HPE 8600 based on hostname conventions
+  //////////////////////////////////////////////////////////////////////////////
+  const int namelen = _POSIX_HOST_NAME_MAX;
+  char name[namelen];
+  int R;
+  int I;
+  int N;
+  gethostname(name,namelen);
+  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
+
+  if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm);
+  else         OptimalCommunicatorSharedMemory(processors,optimal_comm);
+}
+void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
+{
   ////////////////////////////////////////////////////////////////
   // Assert power of two shm_size.
   ////////////////////////////////////////////////////////////////
@@ -253,7 +268,9 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
   /////////////////////////////////////////////////////////////////
   int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
   assert(ierr==0);
-#else 
+}
+void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector<int> &processors,Grid_MPI_Comm & optimal_comm)
+{
   ////////////////////////////////////////////////////////////////
   // Assert power of two shm_size.
   ////////////////////////////////////////////////////////////////
@@ -306,7 +323,6 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector<int> &processors,
   /////////////////////////////////////////////////////////////////
   int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
   assert(ierr==0);
-#endif
 }
 ////////////////////////////////////////////////////////////////////////////////////////////
 // SHMGET
@@ -337,7 +353,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
         int errsv = errno;
         printf("Errno %d\n",errsv);
         printf("key   %d\n",key);
-        printf("size  %lld\n",size);
+        printf("size  %ld\n",size);
         printf("flags %d\n",flags);
         perror("shmget");
         exit(1);
diff --git a/Grid/qcd/action/fermion/DomainWallFermion.h b/Grid/qcd/action/fermion/DomainWallFermion.h
index 4de9ccf0..b6824dc4 100644
--- a/Grid/qcd/action/fermion/DomainWallFermion.h
+++ b/Grid/qcd/action/fermion/DomainWallFermion.h
@@ -43,7 +43,7 @@ namespace Grid {
      INHERIT_IMPL_TYPES(Impl);
     public:
 
-      void FreePropagator(const FermionField &in,FermionField &out,RealD mass, std::vector<double> twist, bool fiveD) {
+      void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary, std::vector<double> twist, bool fiveD) {
 	FermionField in_k(in._grid);
 	FermionField prop_k(in._grid);
 
@@ -55,15 +55,20 @@ namespace Grid {
 	FermionField in_buf(in._grid); in_buf = zero;
 	Scalar ci(0.0,1.0);
 	assert(twist.size() == Nd);//check that twist is Nd
+	assert(boundary.size() == Nd);//check that boundary conditions is Nd
 	int shift = 0;
 	if(fiveD) shift = 1;
 	for(unsigned int nu = 0; nu < Nd; nu++)
 	{
 	  // Shift coordinate lattice index by 1 to account for 5th dimension.
           LatticeCoordinate(coor, nu + shift);
-	  ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu+shift])));
+	  double boundary_phase = ::acos(real(boundary[nu]));
+	  ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift])));
+	  //momenta for propagator shifted by twist+boundary
+	  twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
 	}
-	in_buf = exp(Scalar(2.0*M_PI)*ci*ph*(-1.0))*in;
+	in_buf = exp(ci*ph*(-1.0))*in;
+
 
 	if(fiveD){//FFT only on temporal and spatial dimensions
           std::vector<int> mask(Nd+1,1); mask[0] = 0;
@@ -76,25 +81,28 @@ namespace Grid {
           this->MomentumSpacePropagatorHt(prop_k,in_k,mass,twist);
 	  theFFT.FFT_all_dim(out,prop_k,FFT::backward);
         }
-
 	//phase for boundary condition
-	out = out * exp(Scalar(2.0*M_PI)*ci*ph);
+	out = out * exp(ci*ph);
       };
 
-      virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
+      virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
         bool fiveD = true; //5d propagator by default
-        FreePropagator(in,out,mass,twist,fiveD);
+	FreePropagator(in,out,mass,boundary,twist,fiveD);
       };
 
       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass, bool fiveD) {
 	std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
-        FreePropagator(in,out,mass,twist,fiveD);
+	std::vector<Complex> boundary;
+	for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
+	FreePropagator(in,out,mass,boundary,twist,fiveD);
       };
 
       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
         bool fiveD = true; //5d propagator by default
-	std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
-        FreePropagator(in,out,mass,twist,fiveD);
+	std::vector<double> twist(Nd,0.0); //default: twist angle 0
+	std::vector<Complex> boundary;
+	for(int i=0;i<Nd;i++) boundary.push_back(1); //default: periodic boundary conditions
+	FreePropagator(in,out,mass,boundary,twist,fiveD);
       };
 
       virtual void   Instantiatable(void) {};
diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h
index 436808e6..221f2bfd 100644
--- a/Grid/qcd/action/fermion/FermionOperator.h
+++ b/Grid/qcd/action/fermion/FermionOperator.h
@@ -96,7 +96,7 @@ namespace Grid {
 
       virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
 
-      virtual void  FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
+      virtual void  FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<Complex> boundary,std::vector<double> twist) {
 	FFT theFFT((GridCartesian *) in._grid);
 
 	FermionField in_k(in._grid);
@@ -108,24 +108,31 @@ namespace Grid {
 	FermionField in_buf(in._grid); in_buf = zero;
 	Scalar ci(0.0,1.0);
 	assert(twist.size() == Nd);//check that twist is Nd
+	assert(boundary.size() == Nd);//check that boundary conditions is Nd
 	for(unsigned int nu = 0; nu < Nd; nu++)
 	{
           LatticeCoordinate(coor, nu);
-	  ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu])));
+	  double boundary_phase = ::acos(real(boundary[nu]));
+	  ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu])));
+	  //momenta for propagator shifted by twist+boundary
+	  twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI));
 	}
-	in_buf = exp(Scalar(-2.0*M_PI)*ci*ph)*in;
+	in_buf = exp(ci*ph*(-1.0))*in;
 
 	theFFT.FFT_all_dim(in_k,in_buf,FFT::forward);
         this->MomentumSpacePropagator(prop_k,in_k,mass,twist);
 	theFFT.FFT_all_dim(out,prop_k,FFT::backward);
 
 	//phase for boundary condition
-	out = out * exp(Scalar(2.0*M_PI)*ci*ph);
+	out = out * exp(ci*ph);
 
       };
+
       virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
+		std::vector<Complex> boundary;
+		for(int i=0;i<Nd;i++) boundary.push_back(1);//default: periodic boundary conditions
 		std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
-	        FreePropagator(in,out,mass,twist);
+	        FreePropagator(in,out,mass,boundary,twist);
       };
 
       ///////////////////////////////////////////////
diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
index 54a2c594..25285565 100644
--- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
+++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h
@@ -58,13 +58,29 @@ namespace QCD{
       bool use_heatbath_forecasting;
       AbstractEOFAFermion<Impl>& Lop; // the basic LH operator
       AbstractEOFAFermion<Impl>& Rop; // the basic RH operator
-      SchurRedBlackDiagMooeeSolve<FermionField> Solver;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverHB;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverL;
+      SchurRedBlackDiagMooeeSolve<FermionField> SolverR;
+      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverL;
+      SchurRedBlackDiagMooeeSolve<FermionField> DerivativeSolverR;
       FermionField Phi; // the pseudofermion field for this trajectory
 
     public:
-      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, AbstractEOFAFermion<Impl>& _Rop,
-        OperatorFunction<FermionField>& S, Params& p, bool use_fc=false) : Lop(_Lop), Rop(_Rop), 
-        Solver(S, false, true), Phi(_Lop.FermionGrid()), param(p), use_heatbath_forecasting(use_fc)
+      ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion<Impl>& _Lop, 
+					      AbstractEOFAFermion<Impl>& _Rop,
+					      OperatorFunction<FermionField>& HeatbathCG, 
+					      OperatorFunction<FermionField>& ActionCGL, OperatorFunction<FermionField>& ActionCGR, 
+					      OperatorFunction<FermionField>& DerivCGL , OperatorFunction<FermionField>& DerivCGR, 
+					      Params& p, 
+					      bool use_fc=false) : 
+        Lop(_Lop), 
+	Rop(_Rop), 
+	SolverHB(HeatbathCG,false,true),
+	SolverL(ActionCGL, false, true), SolverR(ActionCGR, false, true), 
+	DerivativeSolverL(DerivCGL, false, true), DerivativeSolverR(DerivCGR, false, true), 
+	Phi(_Lop.FermionGrid()), 
+	param(p), 
+        use_heatbath_forecasting(use_fc)
       {
         AlgRemez remez(param.lo, param.hi, param.precision);
 
@@ -98,6 +114,9 @@ namespace QCD{
       // We generate a Gaussian noise vector \eta, and then compute
       //  \Phi = M_{\rm EOFA}^{-1/2} * \eta
       // using a rational approximation to the inverse square root
+      //
+      // As a check of rational require \Phi^dag M_{EOFA} \Phi == eta^dag M^-1/2^dag M M^-1/2 eta = eta^dag eta
+      //
       virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG)
       {
         Lop.ImportGauge(U);
@@ -118,7 +137,6 @@ namespace QCD{
         RealD scale = std::sqrt(0.5);
         gaussian(pRNG,eta);
         eta = eta * scale;
-        printf("Heatbath source vector: <\\eta|\\eta> = %1.15e\n", norm2(eta));
 
         // \Phi = ( \alpha_{0} + \sum_{k=1}^{N_{p}} \alpha_{l} * \gamma_{l} ) * \eta
         RealD N(PowerNegHalf.norm);
@@ -139,11 +157,11 @@ namespace QCD{
           if(use_heatbath_forecasting){ // Forecast CG guess using solutions from previous poles
             Lop.Mdag(CG_src, Forecast_src);
             CG_soln = Forecast(Lop, Forecast_src, prev_solns);
-            Solver(Lop, CG_src, CG_soln);
+            SolverHB(Lop, CG_src, CG_soln);
             prev_solns.push_back(CG_soln);
           } else {
             CG_soln = zero; // Just use zero as the initial guess
-            Solver(Lop, CG_src, CG_soln);
+            SolverHB(Lop, CG_src, CG_soln);
           }
           Lop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
           tmp[1] = tmp[1] + ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Lop.k ) * tmp[0];
@@ -166,11 +184,11 @@ namespace QCD{
           if(use_heatbath_forecasting){
             Rop.Mdag(CG_src, Forecast_src);
             CG_soln = Forecast(Rop, Forecast_src, prev_solns);
-            Solver(Rop, CG_src, CG_soln);
+            SolverHB(Rop, CG_src, CG_soln);
             prev_solns.push_back(CG_soln);
           } else {
             CG_soln = zero;
-            Solver(Rop, CG_src, CG_soln);
+            SolverHB(Rop, CG_src, CG_soln);
           }
           Rop.Dtilde(CG_soln, tmp[0]); // We actually solved Cayley preconditioned system: transform back
           tmp[1] = tmp[1] - ( PowerNegHalf.residues[k]*gamma_l*gamma_l*Rop.k ) * tmp[0];
@@ -182,8 +200,47 @@ namespace QCD{
         // Reset shift coefficients for energy and force evals
         Lop.RefreshShiftCoefficients(0.0);
         Rop.RefreshShiftCoefficients(-1.0);
+
+	// Bounds check
+	RealD EtaDagEta = norm2(eta);
+	//	RealD PhiDagMPhi= norm2(eta);
+
       };
 
+      void Meofa(const GaugeField& U,const FermionField &phi, FermionField & Mphi) 
+      {
+#if 0
+        Lop.ImportGauge(U);
+        Rop.ImportGauge(U);
+
+        FermionField spProj_Phi(Lop.FermionGrid());
+	FermionField mPhi(Lop.FermionGrid());
+        std::vector<FermionField> tmp(2, Lop.FermionGrid());
+	mPhi = phi;
+	
+        // LH term: S = S - k <\Phi| P_{-} \Omega_{-}^{\dagger} H(mf)^{-1} \Omega_{-} P_{-} |\Phi>
+        spProj(Phi, spProj_Phi, -1, Lop.Ls);
+        Lop.Omega(spProj_Phi, tmp[0], -1, 0);
+        G5R5(tmp[1], tmp[0]);
+        tmp[0] = zero;
+        SolverL(Lop, tmp[1], tmp[0]);
+        Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
+        Lop.Omega(tmp[1], tmp[0], -1, 1);
+	mPhi = mPhi -  Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
+
+        // RH term: S = S + k <\Phi| P_{+} \Omega_{+}^{\dagger} ( H(mb)
+        //               - \Delta_{+}(mf,mb) P_{+} )^{-1} \Omega_{-} P_{-} |\Phi>
+        spProj(Phi, spProj_Phi, 1, Rop.Ls);
+        Rop.Omega(spProj_Phi, tmp[0], 1, 0);
+        G5R5(tmp[1], tmp[0]);
+        tmp[0] = zero;
+        SolverR(Rop, tmp[1], tmp[0]);
+        Rop.Dtilde(tmp[0], tmp[1]);
+        Rop.Omega(tmp[1], tmp[0], 1, 1);
+        action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
+#endif
+      }
+
       // EOFA action: see Eqn. (10) of arXiv:1706.05843
       virtual RealD S(const GaugeField& U)
       {
@@ -201,7 +258,7 @@ namespace QCD{
         Lop.Omega(spProj_Phi, tmp[0], -1, 0);
         G5R5(tmp[1], tmp[0]);
         tmp[0] = zero;
-        Solver(Lop, tmp[1], tmp[0]);
+        SolverL(Lop, tmp[1], tmp[0]);
         Lop.Dtilde(tmp[0], tmp[1]); // We actually solved Cayley preconditioned system: transform back
         Lop.Omega(tmp[1], tmp[0], -1, 1);
         action -= Lop.k * innerProduct(spProj_Phi, tmp[0]).real();
@@ -212,7 +269,7 @@ namespace QCD{
         Rop.Omega(spProj_Phi, tmp[0], 1, 0);
         G5R5(tmp[1], tmp[0]);
         tmp[0] = zero;
-        Solver(Rop, tmp[1], tmp[0]);
+        SolverR(Rop, tmp[1], tmp[0]);
         Rop.Dtilde(tmp[0], tmp[1]);
         Rop.Omega(tmp[1], tmp[0], 1, 1);
         action += Rop.k * innerProduct(spProj_Phi, tmp[0]).real();
@@ -245,7 +302,7 @@ namespace QCD{
         Lop.Omega(spProj_Phi, Omega_spProj_Phi, -1, 0);
         G5R5(CG_src, Omega_spProj_Phi);
         spProj_Phi = zero;
-        Solver(Lop, CG_src, spProj_Phi);
+        DerivativeSolverL(Lop, CG_src, spProj_Phi);
         Lop.Dtilde(spProj_Phi, Chi);
         G5R5(g5_R5_Chi, Chi);
         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
@@ -257,7 +314,7 @@ namespace QCD{
         Rop.Omega(spProj_Phi, Omega_spProj_Phi, 1, 0);
         G5R5(CG_src, Omega_spProj_Phi);
         spProj_Phi = zero;
-        Solver(Rop, CG_src, spProj_Phi);
+        DerivativeSolverR(Rop, CG_src, spProj_Phi);
         Rop.Dtilde(spProj_Phi, Chi);
         G5R5(g5_R5_Chi, Chi);
         Lop.MDeriv(force, g5_R5_Chi, Chi, DaggerNo);
diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
index 0f57fe9c..e9a8853a 100644
--- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
+++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h
@@ -46,6 +46,7 @@ namespace Grid{
 
       OperatorFunction<FermionField> &DerivativeSolver;
       OperatorFunction<FermionField> &ActionSolver;
+      OperatorFunction<FermionField> &HeatbathSolver;
 
       FermionField PhiOdd;   // the pseudo fermion field for this trajectory
       FermionField PhiEven;  // the pseudo fermion field for this trajectory
@@ -54,11 +55,18 @@ namespace Grid{
       TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
                                                 FermionOperator<Impl>  &_DenOp, 
                                                 OperatorFunction<FermionField> & DS,
-                                                OperatorFunction<FermionField> & AS) :
+                                                OperatorFunction<FermionField> & AS ) : 
+      TwoFlavourEvenOddRatioPseudoFermionAction(_NumOp,_DenOp, DS,AS,AS) {};
+
+      TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl>  &_NumOp, 
+                                                FermionOperator<Impl>  &_DenOp, 
+                                                OperatorFunction<FermionField> & DS,
+                                                OperatorFunction<FermionField> & AS, OperatorFunction<FermionField> & HS) :
       NumOp(_NumOp), 
       DenOp(_DenOp), 
       DerivativeSolver(DS), 
       ActionSolver(AS),
+      HeatbathSolver(HS),
       PhiEven(_NumOp.FermionRedBlackGrid()),
       PhiOdd(_NumOp.FermionRedBlackGrid()) 
         {
@@ -111,7 +119,7 @@ namespace Grid{
         // Odd det factors
         Mpc.MpcDag(etaOdd,PhiOdd);
         tmp=zero;
-        ActionSolver(Vpc,PhiOdd,tmp);
+        HeatbathSolver(Vpc,PhiOdd,tmp);
         Vpc.Mpc(tmp,PhiOdd);            
 
         // Even det factors
diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h
index 7d65ed60..9d4376fc 100644
--- a/Grid/qcd/hmc/integrators/Integrator.h
+++ b/Grid/qcd/hmc/integrators/Integrator.h
@@ -54,7 +54,7 @@ public:
 
   template <class ReaderClass, typename std::enable_if<isReader<ReaderClass>::value, int >::type = 0 >
   IntegratorParameters(ReaderClass & Reader){
-    std::cout << "Reading integrator\n";
+    std::cout << GridLogMessage << "Reading integrator\n";
     read(Reader, "Integrator", *this);
   }
 
@@ -132,7 +132,7 @@ class Integrator {
       double end_full = usecond();
       double time_full  = (end_full - start_full) / 1e3;
       double time_force = (end_force - start_force) / 1e3;
-      std::cout << GridLogIntegrator << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl;
+      std::cout << GridLogMessage << "["<<level<<"]["<<a<<"] P update elapsed time: " << time_full << " ms (force: " << time_force << " ms)"  << std::endl;
     }
 
     // Force from the other representations
@@ -237,8 +237,7 @@ class Integrator {
       for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
         // get gauge field from the SmearingPolicy and
         // based on the boolean is_smeared in actionID
-        Field& Us =
-            Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
+        Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
         as[level].actions.at(actionID)->refresh(Us, pRNG);
       }
 
@@ -251,13 +250,11 @@ class Integrator {
   // over the representations
   struct _S {
     template <class FieldType, class Repr>
-    void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
-                    int level, RealD& H) {
+    void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, int level, RealD& H) {
       
       for (int a = 0; a < repr_set.size(); ++a) {
         RealD Hterm = repr_set.at(a)->S(Rep.U);
-        std::cout << GridLogMessage << "S Level " << level << " term " << a
-                  << " H Hirep = " << Hterm << std::endl;
+        std::cout << GridLogMessage << "S Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl;
         H += Hterm;
 
       }
@@ -267,9 +264,10 @@ class Integrator {
   // Calculate action
   RealD S(Field& U) {  // here also U not used
 
+    std::cout << GridLogIntegrator << "Integrator action\n";
 
     RealD H = - FieldImplementation::FieldSquareNorm(P)/HMC_MOMENTUM_DENOMINATOR; // - trace (P*P)/denom
-    std::cout << " Momentum hamiltonian "<< -H<<std::endl;
+
     RealD Hterm;
 
     // Actions
@@ -278,9 +276,9 @@ class Integrator {
         // get gauge field from the SmearingPolicy and
         // based on the boolean is_smeared in actionID
         Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
+        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
         Hterm = as[level].actions.at(actionID)->S(Us);
-        std::cout << GridLogMessage << "S Level " << level << " term "
-                  << actionID << " H = " << Hterm << std::endl;
+        std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
         H += Hterm;
       }
       as[level].apply(S_hireps, Representations, level, H);
@@ -305,8 +303,7 @@ class Integrator {
     // Check the clocks all match on all levels
     for (int level = 0; level < as.size(); ++level) {
       assert(fabs(t_U - t_P[level]) < 1.0e-6);  // must be the same
-      std::cout << GridLogIntegrator << " times[" << level
-                << "]= " << t_P[level] << " " << t_U << std::endl;
+      std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl;
     }
 
     // and that we indeed got to the end of the trajectory
diff --git a/Grid/qcd/hmc/integrators/Integrator_algorithm.h b/Grid/qcd/hmc/integrators/Integrator_algorithm.h
index 13a37aeb..47574026 100644
--- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h
+++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h
@@ -231,8 +231,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
     Field Pfg(U._grid);
     Ufg = U;
     Pfg = zero;
-    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep
-              << std::endl;
+    std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep << std::endl;
     // prepare_fg; no prediction/result cache for now
     // could relax CG stopping conditions for the
     // derivatives in the small step since the force gets multiplied by
@@ -271,8 +270,7 @@ class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy,
         this->step(U, level + 1, first_step, 0);
       }
 
-      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps),
-                        (1.0 - 2.0 * lambda) * eps);
+      this->FG_update_P(U, level, 2 * Chi / ((1.0 - 2.0 * lambda) * eps), (1.0 - 2.0 * lambda) * eps);
 
       if (level == fl) {  // lowest level
         this->update_U(U, 0.5 * eps);
diff --git a/Grid/qcd/utils/GaugeFix.h b/Grid/qcd/utils/GaugeFix.h
index 2b0cd7f2..67f84f14 100644
--- a/Grid/qcd/utils/GaugeFix.h
+++ b/Grid/qcd/utils/GaugeFix.h
@@ -31,6 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 namespace Grid {
 namespace QCD {
 
+
 template <class Gimpl> 
 class FourierAcceleratedGaugeFixer  : public Gimpl {
  public:
@@ -45,18 +46,21 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
       A[mu] = Ta(U[mu]) * cmi;
     }
   }
-  static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) {
+  static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu,int orthog) {
     dmuAmu=zero;
     for(int mu=0;mu<Nd;mu++){
-      dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
+      if ( mu != orthog ) {
+	dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1);
+      }
     }
   }  
-  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) {
+
+  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
     GridBase *grid = Umu._grid;
     GaugeMat xform(grid);
-    SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier);
+    SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog);
   }
-  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) {
+  static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1) {
 
     GridBase *grid = Umu._grid;
 
@@ -71,15 +75,29 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 
     GaugeMat dmuAmu(grid);
 
+    {
+      Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu);
+      Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu); 
+      if( (orthog>=0) && (orthog<Nd) ){
+	std::cout << GridLogMessage << " Gauge fixing to Coulomb gauge time="<<orthog<< " plaq= "<<plaq<<" link trace = "<<link_trace<<  std::endl;
+      } else { 
+	std::cout << GridLogMessage << " Gauge fixing to Landau gauge plaq= "<<plaq<<" link trace = "<<link_trace<<  std::endl;
+      }
+    }
     for(int i=0;i<maxiter;i++){
 
       for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu);
 
       if ( Fourier==false ) { 
-	trG = SteepestDescentStep(U,xform,alpha,dmuAmu);
+	trG = SteepestDescentStep(U,xform,alpha,dmuAmu,orthog);
       } else { 
-	trG = FourierAccelSteepestDescentStep(U,xform,alpha,dmuAmu);
+	trG = FourierAccelSteepestDescentStep(U,xform,alpha,dmuAmu,orthog);
       }
+
+      //      std::cout << GridLogMessage << "trG   "<< trG<< std::endl;
+      //      std::cout << GridLogMessage << "xform "<< norm2(xform)<< std::endl;
+      //      std::cout << GridLogMessage << "dmuAmu "<< norm2(dmuAmu)<< std::endl;
+
       for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu);
       // Monitor progress and convergence test 
       // infrequently to minimise cost overhead
@@ -106,14 +124,14 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
       }
     }
   };
-  static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu) {
+  static Real SteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
     GridBase *grid = U[0]._grid;
 
     std::vector<GaugeMat> A(Nd,grid);
     GaugeMat g(grid);
 
     GaugeLinkToLieAlgebraField(U,A);
-    ExpiAlphaDmuAmu(A,g,alpha,dmuAmu);
+    ExpiAlphaDmuAmu(A,g,alpha,dmuAmu,orthog);
 
 
     Real vol = grid->gSites();
@@ -125,7 +143,7 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
     return trG;
   }
 
-  static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu) {
+  static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,GaugeMat &xform,Real & alpha, GaugeMat & dmuAmu,int orthog) {
 
     GridBase *grid = U[0]._grid;
 
@@ -144,31 +162,41 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
 
     GaugeLinkToLieAlgebraField(U,A);
 
-    DmuAmu(A,dmuAmu);
+    DmuAmu(A,dmuAmu,orthog);
 
-    theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward);
+    std::vector<int> mask(Nd,1);
+    for(int mu=0;mu<Nd;mu++) if (mu==orthog) mask[mu]=0;
+    theFFT.FFT_dim_mask(dmuAmu_p,dmuAmu,mask,FFT::forward);
 
     //////////////////////////////////
     // Work out Fp = psq_max/ psq...
+    // Avoid singularities in Fp
     //////////////////////////////////
     std::vector<int> latt_size = grid->GlobalDimensions();
     std::vector<int> coor(grid->_ndimension,0);
     for(int mu=0;mu<Nd;mu++) {
-
-      Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
-      LatticeCoordinate(pmu,mu);
-      pmu = TwoPiL * pmu ;
-      psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5); 
+      if ( mu != orthog ) { 
+	Real TwoPiL =  M_PI * 2.0/ latt_size[mu];
+	LatticeCoordinate(pmu,mu);
+	pmu = TwoPiL * pmu ;
+	psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5); 
+      }
     }
 
     Complex psqMax(16.0);
     Fp =  psqMax*one/psq;
 
-    pokeSite(TComplex(1.0),Fp,coor);
-
+    pokeSite(TComplex(16.0),Fp,coor);
+    if( (orthog>=0) && (orthog<Nd) ){
+      for(int t=0;t<grid->GlobalDimensions()[orthog];t++){
+	coor[orthog]=t;
+	pokeSite(TComplex(16.0),Fp,coor);
+      }
+    }
+    
     dmuAmu_p  = dmuAmu_p * Fp; 
 
-    theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward);
+    theFFT.FFT_dim_mask(dmuAmu,dmuAmu_p,mask,FFT::backward);
 
     GaugeMat ciadmam(grid);
     Complex cialpha(0.0,-alpha);
@@ -183,11 +211,11 @@ class FourierAcceleratedGaugeFixer  : public Gimpl {
     return trG;
   }
 
-  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) {
+  static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu,int orthog) {
     GridBase *grid = g._grid;
     Complex cialpha(0.0,-alpha);
     GaugeMat ciadmam(grid);
-    DmuAmu(A,dmuAmu);
+    DmuAmu(A,dmuAmu,orthog);
     ciadmam = dmuAmu*cialpha;
     SU<Nc>::taExp(ciadmam,g);
   }  
diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc
index a6e42ceb..61b06829 100644
--- a/HMC/Mobius2p1fEOFA.cc
+++ b/HMC/Mobius2p1fEOFA.cc
@@ -30,6 +30,137 @@ directory
 /*  END LEGAL */
 #include <Grid/Grid.h>
 
+#define MIXED_PRECISION
+
+namespace Grid{ 
+  namespace QCD{
+
+  /*
+   * Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
+   *    -- Store the single prec action operator.
+   *    -- Clone the gauge field from the operator function argument.
+   *    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
+   */
+
+  template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class  SchurOperatorF> 
+  class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
+  public:
+    typedef typename FermionOperatorD::FermionField FieldD;
+    typedef typename FermionOperatorF::FermionField FieldF;
+
+    RealD   Tolerance;
+    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
+    Integer MaxInnerIterations;
+    Integer MaxOuterIterations;
+    GridBase* SinglePrecGrid4; //Grid for single-precision fields
+    GridBase* SinglePrecGrid5; //Grid for single-precision fields
+    RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
+
+    FermionOperatorF &FermOpF;
+    FermionOperatorD &FermOpD;;
+    SchurOperatorF &LinOpF;
+    SchurOperatorD &LinOpD;
+
+    Integer TotalInnerIterations; //Number of inner CG iterations
+    Integer TotalOuterIterations; //Number of restarts
+    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
+
+    MixedPrecisionConjugateGradientOperatorFunction(RealD tol, 
+						    Integer maxinnerit, 
+						    Integer maxouterit, 
+						    GridBase* _sp_grid4, 
+						    GridBase* _sp_grid5, 
+						    FermionOperatorF &_FermOpF,
+						    FermionOperatorD &_FermOpD,
+						    SchurOperatorF   &_LinOpF,
+						    SchurOperatorD   &_LinOpD): 
+      LinOpF(_LinOpF),
+      LinOpD(_LinOpD),
+      FermOpF(_FermOpF),
+      FermOpD(_FermOpD),
+      Tolerance(tol), 
+      InnerTolerance(tol), 
+      MaxInnerIterations(maxinnerit), 
+      MaxOuterIterations(maxouterit), 
+      SinglePrecGrid4(_sp_grid4),
+      SinglePrecGrid5(_sp_grid5),
+      OuterLoopNormMult(100.) 
+    { 
+      /* Debugging instances of objects; references are stored
+      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
+      std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
+      */
+    };
+
+    void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
+
+      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
+
+      SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
+      
+      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
+      //      std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
+      // Assumption made in code to extract gauge field
+      // We could avoid storing LinopD reference alltogether ?
+      assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Must snarf a single precision copy of the gauge field in Linop_d argument
+      ////////////////////////////////////////////////////////////////////////////////////
+      typedef typename FermionOperatorF::GaugeField GaugeFieldF;
+      typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
+      typedef typename FermionOperatorD::GaugeField GaugeFieldD;
+      typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
+
+      GridBase * GridPtrF = SinglePrecGrid4;
+      GridBase * GridPtrD = FermOpD.Umu._grid;
+      GaugeFieldF     U_f  (GridPtrF);
+      GaugeLinkFieldF Umu_f(GridPtrF);
+      //      std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
+      //      std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Moving this to a Clone method of fermion operator would allow to duplicate the 
+      // physics parameters and decrease gauge field copies
+      ////////////////////////////////////////////////////////////////////////////////////
+      GaugeLinkFieldD Umu_d(GridPtrD);
+      for(int mu=0;mu<Nd*2;mu++){ 
+	Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
+	precisionChange(Umu_f,Umu_d);
+	PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
+      }
+      pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
+      pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Could test to make sure that LinOpF and LinOpD agree to single prec?
+      ////////////////////////////////////////////////////////////////////////////////////
+      /*
+      GridBase *Fgrid = psi._grid;
+      FieldD tmp2(Fgrid);
+      FieldD tmp1(Fgrid);
+      LinOpU.Op(src,tmp1);
+      LinOpD.Op(src,tmp2);
+      std::cout << " Double gauge field "<< norm2(FermOpD.Umu)<<std::endl;
+      std::cout << " Single gauge field "<< norm2(FermOpF.Umu)<<std::endl;
+      std::cout << " Test of operators "<<norm2(tmp1)<<std::endl;
+      std::cout << " Test of operators "<<norm2(tmp2)<<std::endl;
+      tmp1=tmp1-tmp2;
+      std::cout << " Test of operators diff "<<norm2(tmp1)<<std::endl;
+      */
+
+      ////////////////////////////////////////////////////////////////////////////////////
+      // Make a mixed precision conjugate gradient
+      ////////////////////////////////////////////////////////////////////////////////////
+      MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
+      std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
+      MPCG(src,psi);
+    }
+  };
+}};
+
 int main(int argc, char **argv) {
   using namespace Grid;
   using namespace Grid::QCD;
@@ -42,7 +173,11 @@ int main(int argc, char **argv) {
    // Typedefs to simplify notation
   typedef WilsonImplR FermionImplPolicy;
   typedef MobiusFermionR FermionAction;
+  typedef MobiusFermionF FermionActionF;
+  typedef MobiusEOFAFermionR FermionEOFAAction;
+  typedef MobiusEOFAFermionF FermionEOFAActionF;
   typedef typename FermionAction::FermionField FermionField;
+  typedef typename FermionActionF::FermionField FermionFieldF;
 
   typedef Grid::XmlReader       Serialiser;
   
@@ -54,12 +189,12 @@ int main(int argc, char **argv) {
   MD.name    = std::string("Force Gradient");
   //  typedef GenericHMCRunner<MinimumNorm2> HMCWrapper; 
   //  MD.name    = std::string("MinimumNorm2");
-  MD.MDsteps = 8;
+  MD.MDsteps = 6;
   MD.trajL   = 1.0;
   
   HMCparameters HMCparams;
-  HMCparams.StartTrajectory  = 70;
-  HMCparams.Trajectories     = 200;
+  HMCparams.StartTrajectory  = 590;
+  HMCparams.Trajectories     = 1000;
   HMCparams.NoMetropolisUntil=  0;
   //  "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
   //  HMCparams.StartingType     =std::string("ColdStart");
@@ -97,42 +232,53 @@ int main(int argc, char **argv) {
   RealD b   = 1.0; 
   RealD c   = 0.0;
 
-  std::vector<Real> hasenbusch({ 0.1, 0.3 });
+  std::vector<Real> hasenbusch({ 0.1, 0.3, 0.6 });
 
   auto GridPtr   = TheHMC.Resources.GetCartesian();
   auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
   auto FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
   auto FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
 
+  std::vector<int> latt  = GridDefaultLatt();
+  std::vector<int> mpi   = GridDefaultMpi();
+  std::vector<int> simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
+  std::vector<int> simdD = GridDefaultSimd(Nd,vComplexD::Nsimd());
+  auto GridPtrF   = SpaceTimeGrid::makeFourDimGrid(latt,simdF,mpi);
+  auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
+  auto FGridF     = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
+  auto FrbGridF   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
+
   IwasakiGaugeActionR GaugeAction(beta);
 
   // temporarily need a gauge field
   LatticeGaugeField U(GridPtr);
+  LatticeGaugeFieldF UF(GridPtrF);
 
   // These lines are unecessary if BC are all periodic
   std::vector<Complex> boundary = {1,1,1,-1};
   FermionAction::ImplParams Params(boundary);
+  FermionActionF::ImplParams ParamsF(boundary);
   
   double ActionStoppingCondition     = 1e-10;
-  double DerivativeStoppingCondition = 1e-7;
+  double DerivativeStoppingCondition = 1e-6;
   double MaxCGIterations = 30000;
-  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
-  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
 
   ////////////////////////////////////
   // Collect actions
   ////////////////////////////////////
   ActionLevel<HMCWrapper::Field> Level1(1);
-  ActionLevel<HMCWrapper::Field> Level2(4);
+  ActionLevel<HMCWrapper::Field> Level2(8);
 
   ////////////////////////////////////
   // Strange action
   ////////////////////////////////////
+  typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
+  typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
+  typedef SchurDiagMooeeOperator<FermionEOFAActionF,FermionFieldF> LinearOperatorEOFAF;
+  typedef SchurDiagMooeeOperator<FermionEOFAAction ,FermionField > LinearOperatorEOFAD;
 
-  //  FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
-  //  FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass,  M5,b,c, Params);
-  //  OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermion(StrangePauliVillarsOp,StrangeOp,OFRp);
-  //  Level1.push_back(&StrangePseudoFermion);
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
+  typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusEOFAFermionD,MobiusEOFAFermionF,LinearOperatorEOFAD,LinearOperatorEOFAF> MxPCG_EOFA;
 
   // DJM: setup for EOFA ratio (Mobius)
   OneFlavourRationalParams OFRp;
@@ -143,9 +289,67 @@ int main(int argc, char **argv) {
   OFRp.degree   = 14;
   OFRp.precision= 50;
 
-  MobiusEOFAFermionR Strange_Op_L(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
-  MobiusEOFAFermionR Strange_Op_R(U, *FGrid, *FrbGrid, *GridPtr, *GridRBPtr, pv_mass, strange_mass, pv_mass, -1.0, 1, M5, b, c);
-  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> EOFA(Strange_Op_L, Strange_Op_R, ActionCG, OFRp, true);
+  
+  MobiusEOFAFermionR Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
+  MobiusEOFAFermionF Strange_Op_LF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
+  MobiusEOFAFermionR Strange_Op_R (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
+  MobiusEOFAFermionF Strange_Op_RF(UF, *FGridF, *FrbGridF, *GridPtrF, *GridRBPtrF, pv_mass, strange_mass,      pv_mass, -1.0, 1, M5, b, c);
+
+  ConjugateGradient<FermionField>      ActionCG(ActionStoppingCondition,MaxCGIterations);
+  ConjugateGradient<FermionField>  DerivativeCG(DerivativeStoppingCondition,MaxCGIterations);
+#ifdef MIXED_PRECISION
+  const int MX_inner = 1000;
+  // Mixed precision EOFA
+  LinearOperatorEOFAD Strange_LinOp_L (Strange_Op_L);
+  LinearOperatorEOFAD Strange_LinOp_R (Strange_Op_R);
+  LinearOperatorEOFAF Strange_LinOp_LF(Strange_Op_LF);
+  LinearOperatorEOFAF Strange_LinOp_RF(Strange_Op_RF);
+
+  MxPCG_EOFA ActionCGL(ActionStoppingCondition,
+		       MX_inner,
+		       MaxCGIterations,
+		       GridPtrF,
+		       FrbGridF,
+		       Strange_Op_LF,Strange_Op_L,
+		       Strange_LinOp_LF,Strange_LinOp_L);
+
+  MxPCG_EOFA DerivativeCGL(DerivativeStoppingCondition,
+			   MX_inner,
+			   MaxCGIterations,
+			   GridPtrF,
+			   FrbGridF,
+			   Strange_Op_LF,Strange_Op_L,
+			   Strange_LinOp_LF,Strange_LinOp_L);
+  
+  MxPCG_EOFA ActionCGR(ActionStoppingCondition,
+		       MX_inner,
+		       MaxCGIterations,
+		       GridPtrF,
+		       FrbGridF,
+		       Strange_Op_RF,Strange_Op_R,
+		       Strange_LinOp_RF,Strange_LinOp_R);
+  
+  MxPCG_EOFA DerivativeCGR(DerivativeStoppingCondition,
+			   MX_inner,
+			   MaxCGIterations,
+			   GridPtrF,
+			   FrbGridF,
+			   Strange_Op_RF,Strange_Op_R,
+			   Strange_LinOp_RF,Strange_LinOp_R);
+
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
+    EOFA(Strange_Op_L, Strange_Op_R, 
+	 ActionCG, 
+	 ActionCGL, ActionCGR,
+	 DerivativeCGL, DerivativeCGR,
+	 OFRp, true);
+#else
+  ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> 
+    EOFA(Strange_Op_L, Strange_Op_R, 
+	 ActionCG, ActionCG,
+	 DerivativeCG, DerivativeCG,
+	 OFRp, true);
+#endif
   Level1.push_back(&EOFA);
 
   ////////////////////////////////////
@@ -162,15 +366,62 @@ int main(int argc, char **argv) {
   }
   light_num.push_back(pv_mass);
 
+  //////////////////////////////////////////////////////////////
+  // Forced to replicate the MxPCG and DenominatorsF etc.. because
+  // there is no convenient way to "Clone" physics params from double op
+  // into single op for any operator pair.
+  // Same issue prevents using MxPCG in the Heatbath step
+  //////////////////////////////////////////////////////////////
   std::vector<FermionAction *> Numerators;
   std::vector<FermionAction *> Denominators;
   std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
+  std::vector<MxPCG *> ActionMPCG;
+  std::vector<MxPCG *> MPCG;
+  std::vector<FermionActionF *> DenominatorsF;
+  std::vector<LinearOperatorD *> LinOpD;
+  std::vector<LinearOperatorF *> LinOpF; 
 
   for(int h=0;h<n_hasenbusch+1;h++){
+
     std::cout << GridLogMessage << " 2f quotient Action  "<< light_num[h] << " / " << light_den[h]<< std::endl;
+
     Numerators.push_back  (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, Params));
     Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, Params));
+
+#ifdef MIXED_PRECISION
+    ////////////////////////////////////////////////////////////////////////////
+    // Mixed precision CG for 2f force
+    ////////////////////////////////////////////////////////////////////////////
+
+    DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsF));
+    LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
+    LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
+
+    MPCG.push_back(new MxPCG(DerivativeStoppingCondition,
+			     MX_inner,
+			     MaxCGIterations,
+			     GridPtrF,
+			     FrbGridF,
+			     *DenominatorsF[h],*Denominators[h],
+			     *LinOpF[h], *LinOpD[h]) );
+
+    ActionMPCG.push_back(new MxPCG(ActionStoppingCondition,
+				   MX_inner,
+				   MaxCGIterations,
+				   GridPtrF,
+				   FrbGridF,
+				   *DenominatorsF[h],*Denominators[h],
+				   *LinOpF[h], *LinOpD[h]) );
+
+    // Heatbath not mixed yet. As inverts numerators not so important as raised mass.
+    Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],ActionCG));
+#else
+    ////////////////////////////////////////////////////////////////////////////
+    // Standard CG for 2f force
+    ////////////////////////////////////////////////////////////////////////////
     Quotients.push_back   (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],DerivativeCG,ActionCG));
+#endif
+
   }
 
   for(int h=0;h<n_hasenbusch+1;h++){
diff --git a/HMC/README b/HMC/README
index 7bea2782..ad1af242 100644
--- a/HMC/README
+++ b/HMC/README
@@ -1,14 +1,54 @@
-* Sign off 2+1f HMC with Hasenbush and strange RHMC
-
-- Wilson plaquette cross checked against CPS and literature GwilsonFnone
-- Timesteps matched
-
-- Use 16^3x32 
-
 ********************************************************************
-* From previous CPS runs:
+TODO: 
 ********************************************************************
 
+i) Got mixed precision in 2f and EOFA force and action solves.
+   But need mixed precision in the heatbath solve. Best for Fermop to have a "clone" method, to
+   reduce the number of solver and action objects. Needed ideally for the EOFA heatbath.
+   15% perhaps
+   Combine with 2x trajectory length?
+
+ii) Rational on EOFA HB  -- relax order
+                         -- Test the approx as per David email
+
+Resume / roll.sh 
+
+----------------------------------------------------------------
+
+- 16^3 Currently 10 traj per hour
+
+- EOFA use a different derivative solver from action solver
+- EOFA fix Davids hack to the SchurRedBlack guessing
+
+*** Reduce precision/tolerance  in EOFA with second CG param.                          (10% speed up)
+*** Force gradient - reduced precision solve for the gradient                          (4/3x speedup)
+
+
+*** Need a plan for gauge field update for mixed precision in HMC                      (2x speed up)
+    -- Store the single prec action operator.
+    -- Clone the gauge field from the operator function argument.
+    -- Build the mixed precision operator dynamically from the passed operator and single prec clone.
+
+*** Mixed precision CG into EOFA portion         
+*** Further reduce precision in forces to 10^-6 ?
+
+*** Overall: a 3x or so is still possible => 500s -> 160s and 20 traj per hour on 16^3.
+
+- Use mixed precision CG in HMC                           
+- SchurRedBlack.h: stop use of operator function; use LinearOperator or similar instead.
+- Or make an OperatorFunction for mixed precision as a wrapper
+
+********************************************************************
+* Signed off 2+1f HMC with Hasenbush and strange RHMC 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
+* Signed off 2+1f HMC with Hasenbush and strange EOFA 16^3 x 32 DWF Ls=16 Plaquette 0.5883 ish
+* Wilson plaquette cross checked against CPS and literature GwilsonFnone
+********************************************************************
+
+********************************************************************
+* RHMC: Timesteps & eigenranges matched from previous CPS 16^3 x 32 runs:
+********************************************************************
+
+****
 Strange (m=0.04)  has eigenspan 
 **** 
 16^3 done as 1+1+1 with separate PV's. 
diff --git a/Hadrons/Application.cc b/Hadrons/Application.cc
index e6fb85fa..e4264f84 100644
--- a/Hadrons/Application.cc
+++ b/Hadrons/Application.cc
@@ -118,7 +118,22 @@ void Application::run(void)
     vm().setRunId(getPar().runId);
     vm().printContent();
     env().printContent();
-    schedule();
+    if (getPar().saveSchedule or getPar().scheduleFile.empty())
+    {
+        schedule();
+        if (getPar().saveSchedule)
+        {
+            std::string filename;
+
+            filename = (getPar().scheduleFile.empty()) ? 
+                         "hadrons.sched" : getPar().scheduleFile;
+            saveSchedule(filename);
+        }
+    }
+    else
+    {
+        loadSchedule(getPar().scheduleFile);
+    }
     printSchedule();
     if (!getPar().graphFile.empty())
     {
@@ -165,12 +180,13 @@ void Application::parseParameterFile(const std::string parameterFileName)
     pop(reader);
 }
 
-void Application::saveParameterFile(const std::string parameterFileName)
+void Application::saveParameterFile(const std::string parameterFileName, unsigned int prec)
 {
     LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
     if (env().getGrid()->IsBoss())
     {
         XmlWriter          writer(parameterFileName);
+        writer.setPrecision(prec);
         ObjectId           id;
         const unsigned int nMod = vm().getNModule();
 
diff --git a/Hadrons/Application.hpp b/Hadrons/Application.hpp
index 1fc8c146..36179c5f 100644
--- a/Hadrons/Application.hpp
+++ b/Hadrons/Application.hpp
@@ -57,6 +57,8 @@ public:
                                         VirtualMachine::GeneticPar, genetic,
                                         std::string,                runId,
                                         std::string,                graphFile,
+                                        std::string,                scheduleFile,
+                                        bool,                       saveSchedule,
                                         int,                        parallelWriteMaxRetry);
         GlobalPar(void): parallelWriteMaxRetry{-1} {}
     };
@@ -79,7 +81,7 @@ public:
     void run(void);
     // XML parameter file I/O
     void parseParameterFile(const std::string parameterFileName);
-    void saveParameterFile(const std::string parameterFileName);
+    void saveParameterFile(const std::string parameterFileName, unsigned int prec=15);
     // schedule computation
     void schedule(void);
     void saveSchedule(const std::string filename);
diff --git a/Hadrons/Modules.hpp b/Hadrons/Modules.hpp
index 774eafa4..4a62cf02 100644
--- a/Hadrons/Modules.hpp
+++ b/Hadrons/Modules.hpp
@@ -6,10 +6,13 @@
 #include <Hadrons/Modules/MContraction/Meson.hpp>
 #include <Hadrons/Modules/MContraction/DiscLoop.hpp>
 #include <Hadrons/Modules/MContraction/Gamma3pt.hpp>
+#include <Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp>
 #include <Hadrons/Modules/MContraction/WeakNonEye3pt.hpp>
 #include <Hadrons/Modules/MFermion/FreeProp.hpp>
 #include <Hadrons/Modules/MFermion/GaugeProp.hpp>
+#include <Hadrons/Modules/MFermion/EMLepton.hpp>
 #include <Hadrons/Modules/MSource/SeqGamma.hpp>
+#include <Hadrons/Modules/MSource/SeqAslash.hpp>
 #include <Hadrons/Modules/MSource/Point.hpp>
 #include <Hadrons/Modules/MSource/Wall.hpp>
 #include <Hadrons/Modules/MSource/Z2.hpp>
diff --git a/Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc
new file mode 100644
index 00000000..78f1a6ae
--- /dev/null
+++ b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc
@@ -0,0 +1,36 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MContraction/WeakMesonDecayKl2.cc
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp>
+
+using namespace Grid;
+using namespace Hadrons;
+using namespace MContraction;
+
+template class Grid::Hadrons::MContraction::TWeakMesonDecayKl2<FIMPL>;
+
diff --git a/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp
new file mode 100644
index 00000000..fa97cda3
--- /dev/null
+++ b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp
@@ -0,0 +1,210 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
+
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+
+#ifndef Hadrons_MContraction_WeakMesonDecayKl2_hpp_
+#define Hadrons_MContraction_WeakMesonDecayKl2_hpp_
+
+#include <Hadrons/Global.hpp>
+#include <Hadrons/Module.hpp>
+#include <Hadrons/ModuleFactory.hpp>
+
+BEGIN_HADRONS_NAMESPACE
+
+/*
+* Kl2 contraction
+* -----------------------------
+*
+* contraction for Kl2 decay, including the lepton
+*
+* 	trace(q1*adj(q2)*g5*gL[mu]) * (gL[mu] * lepton)_{a,b}
+*
+* with open spinor indices (a,b) for the lepton part
+*
+*             q1                  lepton
+*        /------------\       /------------
+*       /              \     /
+*      /                \H_W/
+* g_5 *                  * * 
+*      \                /
+*       \              / 
+*        \____________/
+*             q2
+*
+* * options:
+* - q1: input propagator 1 (string)
+* - q2: input propagator 2 (string)
+* - lepton: input lepton (string)
+*/
+
+/******************************************************************************
+ *                                TWeakMesonDecayKl2                             *
+ ******************************************************************************/
+BEGIN_MODULE_NAMESPACE(MContraction)
+
+class WeakMesonDecayKl2Par: Serializable
+{
+public:
+    GRID_SERIALIZABLE_CLASS_MEMBERS(WeakMesonDecayKl2Par,
+                                    std::string, q1,
+                                    std::string, q2,
+                                    std::string, lepton,
+				    std::string, output);
+};
+
+template <typename FImpl>
+class TWeakMesonDecayKl2: public Module<WeakMesonDecayKl2Par>
+{
+public:
+    FERM_TYPE_ALIASES(FImpl,);
+    class Metadata: Serializable
+    {
+    public:
+        GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
+                                        int, spinidx1,
+                                        int, spinidx2);
+    };
+    typedef Correlator<Metadata> Result;
+public:
+    // constructor
+    TWeakMesonDecayKl2(const std::string name);
+    // destructor
+    virtual ~TWeakMesonDecayKl2(void) {};
+    // dependencies/products
+    virtual std::vector<std::string> getInput(void);
+    virtual std::vector<std::string> getOutput(void);
+protected:
+    // execution
+    virtual void setup(void);
+    // execution
+    virtual void execute(void);
+};
+
+MODULE_REGISTER_TMP(WeakMesonDecayKl2, TWeakMesonDecayKl2<FIMPL>, MContraction);
+
+/******************************************************************************
+ *                           TWeakMesonDecayKl2 implementation                   *
+ ******************************************************************************/
+// constructor /////////////////////////////////////////////////////////////////
+template <typename FImpl>
+TWeakMesonDecayKl2<FImpl>::TWeakMesonDecayKl2(const std::string name)
+: Module<WeakMesonDecayKl2Par>(name)
+{}
+
+// dependencies/products ///////////////////////////////////////////////////////
+template <typename FImpl>
+std::vector<std::string> TWeakMesonDecayKl2<FImpl>::getInput(void)
+{
+    std::vector<std::string> input = {par().q1, par().q2, par().lepton};
+    
+    return input;
+}
+
+template <typename FImpl>
+std::vector<std::string> TWeakMesonDecayKl2<FImpl>::getOutput(void)
+{
+    std::vector<std::string> output = {};
+    
+    return output;
+}
+
+// setup ////////////////////////////////////////////////////////////////////////
+template <typename FImpl>
+void TWeakMesonDecayKl2<FImpl>::setup(void)
+{
+    envTmpLat(LatticeComplex, "c");
+    envTmpLat(PropagatorField, "prop_buf");
+    envCreateLat(PropagatorField, getName());
+    envTmpLat(LatticeComplex, "buf");
+}
+
+// execution ///////////////////////////////////////////////////////////////////
+template <typename FImpl>
+void TWeakMesonDecayKl2<FImpl>::execute(void)
+{
+    LOG(Message) << "Computing QED Kl2 contractions '" << getName() << "' using"
+                 << " quarks '" << par().q1 << "' and '" << par().q2 << "' and"
+		 << "lepton '"  << par().lepton << "'" << std::endl;
+
+
+    auto &res = envGet(PropagatorField, getName()); res = zero;
+    Gamma                  g5(Gamma::Algebra::Gamma5);
+    int                    nt = env().getDim(Tp);
+
+    auto &q1 = envGet(PropagatorField, par().q1);
+    auto &q2 = envGet(PropagatorField, par().q2);
+    auto &lepton = envGet(PropagatorField, par().lepton);
+    envGetTmp(LatticeComplex, buf);
+    std::vector<TComplex>  res_summed;
+    envGetTmp(LatticeComplex, c);
+    envGetTmp(PropagatorField, prop_buf);  
+
+    std::vector<Result>    result;
+    Result r;
+
+    for (unsigned int mu = 0; mu < 4; ++mu)
+    {
+	c = zero;
+	//hadronic part: trace(q1*adj(q2)*g5*gL[mu]) 
+        c   = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu]));
+    	prop_buf = 1.;
+	//multiply lepton part
+	res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton;
+    }
+
+    //loop over spinor index of lepton part
+    unsigned int i = 0;
+    for (unsigned int s1 = 0; s1 < Ns ; ++s1)
+    for (unsigned int s2 = 0; s2 < Ns ; ++s2)
+    {
+	buf = peekColour(peekSpin(res,s1,s2),0,0);
+
+	sliceSum(buf, res_summed, Tp);
+
+	r.corr.clear();
+	for (unsigned int t = 0; t < nt; ++t)
+	{
+              r.corr.push_back(TensorRemove(res_summed[t]));
+	}
+
+	r.info.spinidx1 = s1;
+	r.info.spinidx2 = s2;
+	result.push_back(r);
+
+	i+=1;
+    }
+
+    saveResult(par().output, "weakdecay", result);
+}
+
+END_MODULE_NAMESPACE
+
+END_HADRONS_NAMESPACE
+
+#endif // Hadrons_MContraction_WeakMesonDecayKl2_hpp_
diff --git a/Hadrons/Modules/MFermion/EMLepton.cc b/Hadrons/Modules/MFermion/EMLepton.cc
new file mode 100644
index 00000000..891a6286
--- /dev/null
+++ b/Hadrons/Modules/MFermion/EMLepton.cc
@@ -0,0 +1,35 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MFermion/EMLepton.cc
+
+Copyright (C) 2015-2019
+
+Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Hadrons/Modules/MFermion/EMLepton.hpp>
+
+using namespace Grid;
+using namespace Hadrons;
+using namespace MFermion;
+
+template class Grid::Hadrons::MFermion::TEMLepton<FIMPL>;
+
diff --git a/Hadrons/Modules/MFermion/EMLepton.hpp b/Hadrons/Modules/MFermion/EMLepton.hpp
new file mode 100644
index 00000000..2d26416d
--- /dev/null
+++ b/Hadrons/Modules/MFermion/EMLepton.hpp
@@ -0,0 +1,292 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MFermion/EMLepton.hpp
+
+Copyright (C) 2015-2019
+
+Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+
+
+#ifndef Hadrons_MFermion_EMLepton_hpp_
+#define Hadrons_MFermion_EMLepton_hpp_
+
+#include <Hadrons/Global.hpp>
+#include <Hadrons/Module.hpp>
+#include <Hadrons/ModuleFactory.hpp>
+
+BEGIN_HADRONS_NAMESPACE
+
+/*******************************************************************************
+*
+* Calculates a free lepton propagator with a sequential insertion of 
+* i*\gamma_mu A_mu with a photon field A_mu 
+*
+*	L(x) = \sum_y S(x,y) i*\gamma_mu*A_mu S(y,xl) \delta_{(tl-x0),dt} 
+*
+* with a wall source for the lepton at tl
+*
+* In addition outputs the propagator without photon vertex
+* 
+*	L^{free}(x) =  S(x,xl) \delta_{(tl-x0),dt}
+*
+*
+* options:
+*  - action: fermion action used for propagator (string)
+*  - emField: photon field A_mu (string)
+*  - mass: input mass for the lepton propagator
+*  - twist: twisted boundary for lepton propagator, e.g. "0.0 0.0 0.0 0.5"
+*  - deltat: source-sink separation
+*
+*******************************************************************************/
+
+
+/******************************************************************************
+ *                         EMLepton                                           *
+ ******************************************************************************/
+BEGIN_MODULE_NAMESPACE(MFermion)
+
+class EMLeptonPar: Serializable
+{
+public:
+    GRID_SERIALIZABLE_CLASS_MEMBERS(EMLeptonPar,
+				    std::string,  action,
+				    std::string, emField,
+				    double, mass,
+                                    std::string , boundary,
+				    std::string,  twist,
+                                    unsigned int, deltat);
+};
+
+template <typename FImpl>
+class TEMLepton: public Module<EMLeptonPar>
+{
+public:
+    FERM_TYPE_ALIASES(FImpl,);
+public:
+    typedef PhotonR::GaugeField     EmField;
+public:
+    // constructor
+    TEMLepton(const std::string name);
+    // destructor
+    virtual ~TEMLepton(void) {};
+    // dependency relation
+    virtual std::vector<std::string> getInput(void);
+    virtual std::vector<std::string> getOutput(void);
+protected:
+    // setup
+    virtual void setup(void);
+    // execution
+    virtual void execute(void);
+private:
+    unsigned int Ls_;
+};
+
+MODULE_REGISTER_TMP(EMLepton, TEMLepton<FIMPL>, MFermion);
+
+/******************************************************************************
+ *                 TEMLepton implementation                             *
+ ******************************************************************************/
+// constructor /////////////////////////////////////////////////////////////////
+template <typename FImpl>
+TEMLepton<FImpl>::TEMLepton(const std::string name)
+: Module<EMLeptonPar>(name)
+{}
+
+// dependencies/products ///////////////////////////////////////////////////////
+template <typename FImpl>
+std::vector<std::string> TEMLepton<FImpl>::getInput(void)
+{
+    std::vector<std::string> in = {par().action, par().emField};
+    
+    return in;
+}
+
+template <typename FImpl>
+std::vector<std::string> TEMLepton<FImpl>::getOutput(void)
+{
+    std::vector<std::string> out = {getName(), getName() + "_free"};
+    
+    return out;
+}
+
+// setup ///////////////////////////////////////////////////////////////////////
+template <typename FImpl>
+void TEMLepton<FImpl>::setup(void)
+{
+    Ls_ = env().getObjectLs(par().action);
+    envCreateLat(PropagatorField, getName());
+    envCreateLat(PropagatorField, getName() + "_free");
+    envTmpLat(FermionField, "source", Ls_);
+    envTmpLat(FermionField, "sol", Ls_);
+    envTmpLat(FermionField, "tmp");
+    envTmpLat(PropagatorField, "sourcetmp");
+    envTmpLat(PropagatorField, "proptmp");
+    envTmpLat(PropagatorField, "freetmp");
+    envTmp(Lattice<iScalar<vInteger>>, "tlat",1, envGetGrid(LatticeComplex));
+
+}
+
+// execution ///////////////////////////////////////////////////////////////////
+template <typename FImpl>
+void TEMLepton<FImpl>::execute(void)
+{
+    LOG(Message) << "Computing free fermion propagator '" << getName() << "'"
+                 << std::endl;
+    
+    auto        &mat = envGet(FMat, par().action);
+    RealD mass = par().mass;
+    Complex ci(0.0,1.0);
+
+    PropagatorField &Aslashlep = envGet(PropagatorField, getName());
+    PropagatorField &lep = envGet(PropagatorField, getName() + "_free");
+    
+    envGetTmp(FermionField, source);
+    envGetTmp(FermionField, sol);
+    envGetTmp(FermionField, tmp);
+    LOG(Message) << "Calculating a lepton Propagator with sequential Aslash insertion with lepton mass " 
+                 << mass << " using the action '" << par().action
+                 << "' for fixed source-sink separation of " << par().deltat << std::endl;
+
+    envGetTmp(Lattice<iScalar<vInteger>>, tlat);
+    LatticeCoordinate(tlat, Tp);
+
+
+    std::vector<double> twist = strToVec<double>(par().twist);
+    if(twist.size() != Nd)
+    {
+	HADRONS_ERROR(Size, "number of twist angles does not match number of dimensions");
+    }
+    std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
+    if(boundary.size() != Nd)
+    {
+	HADRONS_ERROR(Size, "number of boundary conditions does not match number of dimensions");
+    }
+
+    auto &stoch_photon = envGet(EmField,  par().emField);
+    unsigned int nt = env().getDim(Tp);
+
+    envGetTmp(PropagatorField, proptmp);
+    envGetTmp(PropagatorField, freetmp);
+    envGetTmp(PropagatorField, sourcetmp);
+
+    std::vector<int> position;
+    SitePropagator   id;
+    id  = 1.;
+
+    unsigned int tl=0; 
+
+    //wallsource at tl
+    sourcetmp = 1.;
+    sourcetmp = where((tlat == tl), sourcetmp, 0.*sourcetmp);
+
+    //free propagator from pt source 
+    for (unsigned int s = 0; s < Ns; ++s)
+    {
+        LOG(Message) << "Calculation for spin= " << s << std::endl;
+	if (Ls_ == 1)
+	{
+	    PropToFerm<FImpl>(source, sourcetmp, s, 0);
+	}
+	else
+	{
+	    PropToFerm<FImpl>(tmp, sourcetmp, s, 0);
+	    // 5D source if action is 5d
+	    mat.ImportPhysicalFermionSource(tmp, source);
+	}
+        sol = zero;
+	mat.FreePropagator(source,sol,mass,boundary,twist);
+	if (Ls_ == 1)
+	{
+            FermToProp<FImpl>(freetmp, sol, s, 0);
+	}
+        // create 4D propagators from 5D one if necessary
+        if (Ls_ > 1)
+        {
+            mat.ExportPhysicalFermionSolution(sol, tmp);
+            FermToProp<FImpl>(freetmp, tmp, s, 0);
+        }
+    }
+
+    for(tl=0;tl<nt;tl++){
+
+	//shift free propagator to different source positions
+	//account for possible anti-periodic boundary in time
+	proptmp = Cshift(freetmp,Tp, -tl);
+	proptmp = where( tlat < tl, boundary[Tp]*proptmp, proptmp);
+
+        // free propagator for fixed source-sink separation 
+	lep = where(tlat == (tl-par().deltat+nt)%nt, proptmp, lep);
+
+        // i*A_mu*gamma_mu
+        sourcetmp = zero;
+        for(unsigned int mu=0;mu<=3;mu++)
+        {
+	    Gamma gmu(Gamma::gmu[mu]);
+	    sourcetmp +=  ci * PeekIndex<LorentzIndex>(stoch_photon, mu) *  (gmu * proptmp );
+        }
+
+        proptmp = zero;
+
+        //sequential propagator from i*Aslash*S
+        LOG(Message) << "Sequential propagator for t= " << tl << std::endl;
+        for (unsigned int s = 0; s < Ns; ++s)
+        {
+            LOG(Message) << "Calculation for spin= " << s << std::endl;
+	    if (Ls_ == 1)
+	    {
+		PropToFerm<FImpl>(source, sourcetmp, s, 0);
+	    }
+	    else
+	    {
+		PropToFerm<FImpl>(tmp, sourcetmp, s, 0);
+		// 5D source if action is 5d
+		mat.ImportPhysicalFermionSource(tmp, source);
+	    }
+            sol = zero;
+	    mat.FreePropagator(source,sol,mass,boundary,twist);
+	    if (Ls_ == 1)
+	    {
+        	FermToProp<FImpl>(proptmp, sol, s, 0);
+	    }
+            // create 4D propagators from 5D one if necessary
+            if (Ls_ > 1)
+            {
+                mat.ExportPhysicalFermionSolution(sol, tmp);
+                FermToProp<FImpl>(proptmp, tmp, s, 0);
+            }
+	}
+	// keep the result for the desired delta t
+	Aslashlep = where(tlat == (tl-par().deltat+nt)%nt, proptmp, Aslashlep);
+    }
+
+    //account for possible anti-periodic boundary in time
+    Aslashlep = where( tlat >= nt-par().deltat, boundary[Tp]*Aslashlep, Aslashlep);
+    lep = where( tlat >= nt-par().deltat, boundary[Tp]*lep, lep);
+
+}
+
+END_MODULE_NAMESPACE
+
+END_HADRONS_NAMESPACE
+
+#endif // Hadrons_MFermion_EMLepton_hpp_
diff --git a/Hadrons/Modules/MFermion/FreeProp.hpp b/Hadrons/Modules/MFermion/FreeProp.hpp
index 5cb168c3..baccba6a 100644
--- a/Hadrons/Modules/MFermion/FreeProp.hpp
+++ b/Hadrons/Modules/MFermion/FreeProp.hpp
@@ -49,6 +49,7 @@ public:
                                     std::string, source,
 				    std::string,  action,
 				    double, mass,
+                                    std::string , boundary,
 				    std::string,  twist);
 };
 
@@ -168,8 +169,16 @@ void TFreeProp<FImpl>::execute(void)
         }
         sol = zero;
 	std::vector<double> twist = strToVec<double>(par().twist);
-	if(twist.size() != Nd) HADRONS_ERROR(Size, "number of twist angles does not match number of dimensions");
-	mat.FreePropagator(source,sol,mass,twist);
+	if(twist.size() != Nd)
+	{
+	    HADRONS_ERROR(Size, "number of twist angles does not match number of dimensions");
+	}
+	std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
+	if(boundary.size() != Nd)
+	{
+	    HADRONS_ERROR(Size, "number of boundary conditions does not match number of dimensions");
+	}
+	mat.FreePropagator(source,sol,mass,boundary,twist);
         FermToProp<FImpl>(prop, sol, s, c);
         // create 4D propagators from 5D one if necessary
         if (Ls_ > 1)
diff --git a/Hadrons/Modules/MGauge/GaugeFix.hpp b/Hadrons/Modules/MGauge/GaugeFix.hpp
index 57b1e082..f5ebb39f 100644
--- a/Hadrons/Modules/MGauge/GaugeFix.hpp
+++ b/Hadrons/Modules/MGauge/GaugeFix.hpp
@@ -42,6 +42,8 @@ BEGIN_HADRONS_NAMESPACE
  ******************************************************************************/
 BEGIN_MODULE_NAMESPACE(MGauge)
 
+GRID_SERIALIZABLE_ENUM(Fix, undef, coulomb, Nd - 1, landau, -1);
+
 class GaugeFixPar: Serializable
 {
 public:
@@ -51,6 +53,7 @@ public:
                                     int, maxiter, 
                                     Real, Omega_tol, 
                                     Real, Phi_tol,
+                                    Fix,  gaugeFix,
                                     bool, Fourier);
 };
 
@@ -115,8 +118,8 @@ void TGaugeFix<GImpl>::execute(void)
 //Loads the gauge and fixes it
 {
     std::cout << "executing" << std::endl;
-    LOG(Message) << "Fixing the Gauge" << std::endl;
-    LOG(Message) << par().gauge << std::endl;
+    LOG(Message) << "Fixing the Gauge " << par().gauge << " using "
+                 << par().gaugeFix << " guage fixing. " << Nd - 1 << std::endl;
     auto &U     = envGet(GaugeField, par().gauge);
     auto &Umu   = envGet(GaugeField, getName());
     auto &xform = envGet(GaugeMat, getName()+"_xform");
@@ -126,9 +129,10 @@ void TGaugeFix<GImpl>::execute(void)
     int  maxiter   = par().maxiter;
     Real Omega_tol = par().Omega_tol;
     Real Phi_tol   = par().Phi_tol;
+    int  gaugeFix  = par().gaugeFix;
     bool Fourier   = par().Fourier;
     Umu = U;
-    FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier);
+    FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,gaugeFix);
     LOG(Message) << "Gauge Fixed" << std::endl;
 }
 
diff --git a/Hadrons/Modules/MIO/LoadEigenPack.cc b/Hadrons/Modules/MIO/LoadEigenPack.cc
index 15153871..0750d265 100644
--- a/Hadrons/Modules/MIO/LoadEigenPack.cc
+++ b/Hadrons/Modules/MIO/LoadEigenPack.cc
@@ -31,7 +31,7 @@ using namespace Grid;
 using namespace Hadrons;
 using namespace MIO;
 
-template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL>>;
+template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL>, GIMPL>;
 #ifdef GRID_DEFAULT_PRECISION_DOUBLE
-template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>;
+template class Grid::Hadrons::MIO::TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>, GIMPL>;
 #endif
diff --git a/Hadrons/Modules/MIO/LoadEigenPack.hpp b/Hadrons/Modules/MIO/LoadEigenPack.hpp
index 9751073a..0d6036b8 100644
--- a/Hadrons/Modules/MIO/LoadEigenPack.hpp
+++ b/Hadrons/Modules/MIO/LoadEigenPack.hpp
@@ -47,16 +47,21 @@ public:
                                     std::string, filestem,
                                     bool, multiFile,
                                     unsigned int, size,
-                                    unsigned int, Ls);
+                                    unsigned int, Ls,
+                                    std::string, gaugeXform);
 };
 
-template <typename Pack>
+template <typename Pack, typename GImpl>
 class TLoadEigenPack: public Module<LoadEigenPackPar>
 {
 public:
     typedef typename Pack::Field   Field;
     typedef typename Pack::FieldIo FieldIo;
     typedef BaseEigenPack<Field>   BasePack;
+
+public:
+    GAUGE_TYPE_ALIASES(GImpl, );
+    typedef typename GImpl::GaugeLinkField GaugeMat;
 public:
     // constructor
     TLoadEigenPack(const std::string name);
@@ -71,31 +76,36 @@ public:
     virtual void execute(void);
 };
 
-MODULE_REGISTER_TMP(LoadFermionEigenPack, TLoadEigenPack<FermionEigenPack<FIMPL>>, MIO);
+MODULE_REGISTER_TMP(LoadFermionEigenPack, ARG(TLoadEigenPack<FermionEigenPack<FIMPL>, GIMPL>), MIO);
 #ifdef GRID_DEFAULT_PRECISION_DOUBLE
-MODULE_REGISTER_TMP(LoadFermionEigenPackIo32, ARG(TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>>), MIO);
+MODULE_REGISTER_TMP(LoadFermionEigenPackIo32, ARG(TLoadEigenPack<FermionEigenPack<FIMPL, FIMPLF>, GIMPL>), MIO);
 #endif
 
 /******************************************************************************
  *                    TLoadEigenPack implementation                           *
  ******************************************************************************/
 // constructor /////////////////////////////////////////////////////////////////
-template <typename Pack>
-TLoadEigenPack<Pack>::TLoadEigenPack(const std::string name)
+template <typename Pack, typename GImpl>
+TLoadEigenPack<Pack, GImpl>::TLoadEigenPack(const std::string name)
 : Module<LoadEigenPackPar>(name)
 {}
 
 // dependencies/products ///////////////////////////////////////////////////////
-template <typename Pack>
-std::vector<std::string> TLoadEigenPack<Pack>::getInput(void)
+template <typename Pack, typename GImpl>
+std::vector<std::string> TLoadEigenPack<Pack, GImpl>::getInput(void)
 {
     std::vector<std::string> in;
+
+    if (!par().gaugeXform.empty())
+    {
+        in = {par().gaugeXform};
+    }
     
     return in;
 }
 
-template <typename Pack>
-std::vector<std::string> TLoadEigenPack<Pack>::getOutput(void)
+template <typename Pack, typename GImpl>
+std::vector<std::string> TLoadEigenPack<Pack, GImpl>::getOutput(void)
 {
     std::vector<std::string> out = {getName()};
     
@@ -103,8 +113,8 @@ std::vector<std::string> TLoadEigenPack<Pack>::getOutput(void)
 }
 
 // setup ///////////////////////////////////////////////////////////////////////
-template <typename Pack>
-void TLoadEigenPack<Pack>::setup(void)
+template <typename Pack, typename GImpl>
+void TLoadEigenPack<Pack, GImpl>::setup(void)
 {
     GridBase *gridIo = nullptr;
 
@@ -114,16 +124,64 @@ void TLoadEigenPack<Pack>::setup(void)
     }
     envCreateDerived(BasePack, Pack, getName(), par().Ls, par().size, 
                      envGetRbGrid(Field, par().Ls), gridIo);
+
+    if (!par().gaugeXform.empty())
+    {
+        if (par().Ls > 1)
+        {
+            LOG(Message) << "Setup 5d GaugeMat for Ls = " << par().Ls << std::endl;
+            envTmp(GaugeMat,    "tmpXform", par().Ls, envGetGrid5(Field, par().Ls));
+            envTmp(GaugeMat, "tmpXformOdd", par().Ls, envGetRbGrid5(Field, par().Ls));
+        }
+        else
+        {
+            LOG(Message) << "Setup 4d GaugeMat for Ls = " << par().Ls << std::endl;
+            envTmp(GaugeMat,    "tmpXform", par().Ls, envGetGrid(Field));
+            envTmp(GaugeMat, "tmpXformOdd", par().Ls, envGetRbGrid(Field));
+        }
+        
+    }
 }
 
 // execution ///////////////////////////////////////////////////////////////////
-template <typename Pack>
-void TLoadEigenPack<Pack>::execute(void)
+template <typename Pack, typename GImpl>
+void TLoadEigenPack<Pack, GImpl>::execute(void)
 {
     auto &epack = envGetDerived(BasePack, Pack, getName());
 
     epack.read(par().filestem, par().multiFile, vm().getTrajectory());
     epack.eval.resize(par().size);
+
+    if (!par().gaugeXform.empty())
+    {
+
+        LOG(Message) << "Applying gauge transformation to eigenvectors " << getName()
+                     << " using " << par().gaugeXform << std::endl;
+        auto &xform = envGet(GaugeMat, par().gaugeXform);
+        envGetTmp(GaugeMat,    tmpXform);
+        envGetTmp(GaugeMat, tmpXformOdd);
+
+        if (par().Ls > 1) 
+        {
+            LOG(Message) << "Creating 5d GaugeMat from " << par().gaugeXform << std::endl;
+            startTimer("5-d gauge transform creation");
+            for (unsigned int j = 0; j < par().Ls; j++)
+            {
+                InsertSlice(xform, tmpXform, j, 0);
+            }
+            stopTimer("5-d gauge transform creation");
+        }
+
+        pickCheckerboard(Odd, tmpXformOdd, tmpXform);
+        startTimer("Transform application");
+        for (unsigned int i = 0; i < par().size; i++)
+        {
+            LOG(Message) << "Applying gauge transformation to eigenvector i = " << i << "/" << par().size << std::endl;
+            epack.evec[i].checkerboard = Odd;
+            epack.evec[i] = tmpXformOdd * epack.evec[i];
+        }
+        stopTimer("Transform application");
+    }
 }
 
 END_MODULE_NAMESPACE
diff --git a/Hadrons/Modules/MSource/SeqAslash.cc b/Hadrons/Modules/MSource/SeqAslash.cc
new file mode 100644
index 00000000..6586f2f2
--- /dev/null
+++ b/Hadrons/Modules/MSource/SeqAslash.cc
@@ -0,0 +1,36 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MSource/SeqAslash.cc
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <Hadrons/Modules/MSource/SeqAslash.hpp>
+
+using namespace Grid;
+using namespace Hadrons;
+using namespace MSource;
+
+template class Grid::Hadrons::MSource::TSeqAslash<FIMPL>;
+
diff --git a/Hadrons/Modules/MSource/SeqAslash.hpp b/Hadrons/Modules/MSource/SeqAslash.hpp
new file mode 100644
index 00000000..a31051fd
--- /dev/null
+++ b/Hadrons/Modules/MSource/SeqAslash.hpp
@@ -0,0 +1,186 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid 
+
+Source file: Hadrons/Modules/MSource/SeqAslash.hpp
+
+Copyright (C) 2015-2018
+
+Author: Antonin Portelli <antonin.portelli@me.com>
+Author: Lanny91 <andrew.lawson@gmail.com>
+Author: Vera Guelpers <Vera.Guelpers@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+
+#ifndef Hadrons_MSource_SeqAslash_hpp_
+#define Hadrons_MSource_SeqAslash_hpp_
+
+#include <Hadrons/Global.hpp>
+#include <Hadrons/Module.hpp>
+#include <Hadrons/ModuleFactory.hpp>
+
+BEGIN_HADRONS_NAMESPACE
+
+/*
+ 
+ Sequential source
+ -----------------------------
+ * src_x = q_x * theta(x_3 - tA) * theta(tB - x_3) * i * A_mu g_mu * exp(i x.mom)
+ 
+ * options:
+ - q: input propagator (string)
+ - tA: begin timeslice (integer)
+ - tB: end timesilce (integer)
+ - emField: input photon field (string)
+ - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
+ 
+ */
+
+/******************************************************************************
+ *                         SeqAslash                             *
+ ******************************************************************************/
+BEGIN_MODULE_NAMESPACE(MSource)
+
+class SeqAslashPar: Serializable
+{
+public:
+    GRID_SERIALIZABLE_CLASS_MEMBERS(SeqAslashPar,
+                                    std::string,    q,
+                                    unsigned int,   tA,
+                                    unsigned int,   tB,
+                                    std::string,    emField,
+                                    std::string,    mom);
+};
+
+template <typename FImpl>
+class TSeqAslash: public Module<SeqAslashPar>
+{
+public:
+    FERM_TYPE_ALIASES(FImpl,);
+public:
+    typedef PhotonR::GaugeField     EmField;
+public:
+    // constructor
+    TSeqAslash(const std::string name);
+    // destructor
+    virtual ~TSeqAslash(void) {};
+    // dependency relation
+    virtual std::vector<std::string> getInput(void);
+    virtual std::vector<std::string> getOutput(void);
+protected:
+    // setup
+    virtual void setup(void);
+    // execution
+    virtual void execute(void);
+private:
+    bool        hasPhase_{false};
+    std::string momphName_, tName_;
+};
+
+MODULE_REGISTER_TMP(SeqAslash, TSeqAslash<FIMPL>, MSource);
+
+/******************************************************************************
+ *                         TSeqAslash implementation                           *
+ ******************************************************************************/
+// constructor /////////////////////////////////////////////////////////////////
+template <typename FImpl>
+TSeqAslash<FImpl>::TSeqAslash(const std::string name)
+: Module<SeqAslashPar>(name)
+, momphName_ (name + "_momph")
+, tName_ (name + "_t")
+{}
+
+// dependencies/products ///////////////////////////////////////////////////////
+template <typename FImpl>
+std::vector<std::string> TSeqAslash<FImpl>::getInput(void)
+{
+    std::vector<std::string> in = {par().q,par().emField};
+    
+    return in;
+}
+
+template <typename FImpl>
+std::vector<std::string> TSeqAslash<FImpl>::getOutput(void)
+{
+    std::vector<std::string> out = {getName()};
+    
+    return out;
+}
+
+// setup ///////////////////////////////////////////////////////////////////////
+template <typename FImpl>
+void TSeqAslash<FImpl>::setup(void)
+{
+    envCreateLat(PropagatorField, getName());
+    envCache(Lattice<iScalar<vInteger>>, tName_, 1, envGetGrid(LatticeComplex));
+    envCacheLat(LatticeComplex, momphName_);
+    envTmpLat(LatticeComplex, "coor");
+}
+
+// execution ///////////////////////////////////////////////////////////////////
+template <typename FImpl>
+void TSeqAslash<FImpl>::execute(void)
+{
+    if (par().tA == par().tB)
+    {
+        LOG(Message) << "Generating Aslash sequential source at t= " << par().tA 
+		     << " using the photon field " << par().emField << std::endl; 
+    }
+    else
+    {
+        LOG(Message) << "Generating Aslash sequential source for "
+                     << par().tA << " <= t <= " << par().tB 
+		     << " using the photon field " << par().emField << std::endl;
+    }
+    auto  &src = envGet(PropagatorField, getName()); src=zero;
+    auto  &q   = envGet(PropagatorField, par().q);
+    auto  &ph  = envGet(LatticeComplex, momphName_);
+    auto  &t   = envGet(Lattice<iScalar<vInteger>>, tName_);
+
+    if (!hasPhase_)
+    {
+        Complex           i(0.0,1.0);
+        std::vector<Real> p;
+
+        envGetTmp(LatticeComplex, coor);
+        p  = strToVec<Real>(par().mom);
+        ph = zero;
+        for(unsigned int mu = 0; mu < env().getNd(); mu++)
+        {
+            LatticeCoordinate(coor, mu);
+            ph = ph + (p[mu]/env().getDim(mu))*coor;
+        }
+        ph = exp((Real)(2*M_PI)*i*ph);
+        LatticeCoordinate(t, Tp);
+        hasPhase_ = true;
+    }
+    auto &stoch_photon = envGet(EmField,  par().emField);
+    Complex ci(0.0,1.0);
+    for(unsigned int mu=0;mu<=3;mu++)
+    {
+	Gamma gmu(Gamma::gmu[mu]);
+	src = src + where((t >= par().tA) and (t <= par().tB), ci * PeekIndex<LorentzIndex>(stoch_photon, mu) *ph*(gmu*q), 0.*q);
+    }
+}
+
+END_MODULE_NAMESPACE
+
+END_HADRONS_NAMESPACE
+
+#endif // Hadrons_MSource_SeqAslash_hpp_
diff --git a/Hadrons/Modules/MSource/SeqConserved.cc b/Hadrons/Modules/MSource/SeqConserved.cc
index 73dec85c..fbef504f 100644
--- a/Hadrons/Modules/MSource/SeqConserved.cc
+++ b/Hadrons/Modules/MSource/SeqConserved.cc
@@ -32,4 +32,4 @@ using namespace Hadrons;
 using namespace MSource;
 
 template class Grid::Hadrons::MSource::TSeqConserved<FIMPL>;
-
+template class Grid::Hadrons::MSource::TSeqConserved<ZFIMPL>;
diff --git a/Hadrons/Modules/MSource/SeqConserved.hpp b/Hadrons/Modules/MSource/SeqConserved.hpp
index 0bdd7f39..33a2c9fd 100644
--- a/Hadrons/Modules/MSource/SeqConserved.hpp
+++ b/Hadrons/Modules/MSource/SeqConserved.hpp
@@ -104,6 +104,7 @@ private:
 };
 
 MODULE_REGISTER_TMP(SeqConserved, TSeqConserved<FIMPL>, MSource);
+MODULE_REGISTER_TMP(ZSeqConserved, TSeqConserved<ZFIMPL>, MSource);
 
 
 /******************************************************************************
diff --git a/Hadrons/Modules/MSource/SeqGamma.cc b/Hadrons/Modules/MSource/SeqGamma.cc
index d901d5af..c2ae2322 100644
--- a/Hadrons/Modules/MSource/SeqGamma.cc
+++ b/Hadrons/Modules/MSource/SeqGamma.cc
@@ -32,4 +32,4 @@ using namespace Hadrons;
 using namespace MSource;
 
 template class Grid::Hadrons::MSource::TSeqGamma<FIMPL>;
-
+template class Grid::Hadrons::MSource::TSeqGamma<ZFIMPL>;
diff --git a/Hadrons/Modules/MSource/SeqGamma.hpp b/Hadrons/Modules/MSource/SeqGamma.hpp
index 1ab5f9b9..25f32a75 100644
--- a/Hadrons/Modules/MSource/SeqGamma.hpp
+++ b/Hadrons/Modules/MSource/SeqGamma.hpp
@@ -91,6 +91,7 @@ private:
 };
 
 MODULE_REGISTER_TMP(SeqGamma, TSeqGamma<FIMPL>, MSource);
+MODULE_REGISTER_TMP(ZSeqGamma, TSeqGamma<ZFIMPL>, MSource);
 
 /******************************************************************************
  *                         TSeqGamma implementation                           *
diff --git a/Hadrons/Modules/MSource/Wall.hpp b/Hadrons/Modules/MSource/Wall.hpp
index e43c61bb..945e2760 100644
--- a/Hadrons/Modules/MSource/Wall.hpp
+++ b/Hadrons/Modules/MSource/Wall.hpp
@@ -119,6 +119,9 @@ template <typename FImpl>
 void TWall<FImpl>::setup(void)
 {
     envCreateLat(PropagatorField, getName());
+    envCache(Lattice<iScalar<vInteger>>, tName_, 1, envGetGrid(LatticeComplex));
+    envCacheLat(LatticeComplex, momphName_);
+    envTmpLat(LatticeComplex, "coor");
 }
 
 // execution ///////////////////////////////////////////////////////////////////
diff --git a/Hadrons/Utilities/HadronsXmlRun.cc b/Hadrons/Utilities/HadronsXmlRun.cc
index a69de7d7..d13dbfe6 100644
--- a/Hadrons/Utilities/HadronsXmlRun.cc
+++ b/Hadrons/Utilities/HadronsXmlRun.cc
@@ -35,22 +35,15 @@ using namespace Hadrons;
 int main(int argc, char *argv[])
 {
     // parse command line
-    std::string parameterFileName, scheduleFileName = "";
+    std::string parameterFileName;
     
     if (argc < 2)
     {
-        std::cerr << "usage: " << argv[0] << " <parameter file> [<precomputed schedule>] [Grid options]";
+        std::cerr << "usage: " << argv[0] << " <parameter file> [Grid options]";
         std::cerr << std::endl;
         std::exit(EXIT_FAILURE);
     }
     parameterFileName = argv[1];
-    if (argc > 2)
-    {
-        if (argv[2][0] != '-')
-        {
-            scheduleFileName = argv[2];
-        }
-    }
     
     // initialization
     Grid_init(&argc, &argv);
@@ -61,10 +54,6 @@ int main(int argc, char *argv[])
         Application application(parameterFileName);
         
         application.parseParameterFile(parameterFileName);
-        if (!scheduleFileName.empty())
-        {
-            application.loadSchedule(scheduleFileName);
-        }
         application.run();
     }
     catch (const std::exception& e)
diff --git a/Hadrons/VirtualMachine.cc b/Hadrons/VirtualMachine.cc
index 0d2e7de2..8a44ba69 100644
--- a/Hadrons/VirtualMachine.cc
+++ b/Hadrons/VirtualMachine.cc
@@ -601,11 +601,10 @@ VirtualMachine::Program VirtualMachine::schedule(const GeneticPar &par)
     Scheduler scheduler(graph, memPeak, gpar);
     gen = 0;
     scheduler.initPopulation();
-    LOG(Iterative) << "Start: " << sizeString(scheduler.getMinValue()) 
-                   << std::endl;
+    LOG(Message) << "Start: " << sizeString(scheduler.getMinValue()) 
+                 << std::endl;
     do
     {
-        //LOG(Debug) << "Generation " << gen << ":" << std::endl;
         scheduler.nextGeneration();
         if (gen != 0)
         {
@@ -622,8 +621,8 @@ VirtualMachine::Program VirtualMachine::schedule(const GeneticPar &par)
         prevPeak = scheduler.getMinValue();
         if (gen % 10 == 0)
         {
-            LOG(Iterative) << "Generation " << gen << ": "
-                           << sizeString(scheduler.getMinValue()) << std::endl;
+            LOG(Message) << "Generation " << gen << ": "
+                         << sizeString(scheduler.getMinValue()) << std::endl;
         }
         
         gen++;
diff --git a/Hadrons/modules.inc b/Hadrons/modules.inc
index c13c18a2..5cb77b9e 100644
--- a/Hadrons/modules.inc
+++ b/Hadrons/modules.inc
@@ -1,6 +1,7 @@
 modules_cc =\
   Modules/MContraction/Baryon.cc \
   Modules/MContraction/Meson.cc \
+  Modules/MContraction/WeakMesonDecayKl2.cc \
   Modules/MContraction/WeakEye3pt.cc \
   Modules/MContraction/A2ALoop.cc \
   Modules/MContraction/WeakNonEye3pt.cc \
@@ -10,11 +11,13 @@ modules_cc =\
   Modules/MContraction/Gamma3pt.cc \
   Modules/MFermion/FreeProp.cc \
   Modules/MFermion/GaugeProp.cc \
+  Modules/MFermion/EMLepton.cc \
   Modules/MSource/Momentum.cc \
   Modules/MSource/Point.cc \
   Modules/MSource/Wall.cc \
   Modules/MSource/SeqConserved.cc \
   Modules/MSource/SeqGamma.cc \
+  Modules/MSource/SeqAslash.cc \
   Modules/MSource/Z2.cc \
   Modules/MSink/Point.cc \
   Modules/MSink/Smear.cc \
@@ -83,10 +86,13 @@ modules_hpp =\
   Modules/MContraction/Meson.hpp \
   Modules/MContraction/DiscLoop.hpp \
   Modules/MContraction/Gamma3pt.hpp \
+  Modules/MContraction/WeakMesonDecayKl2.hpp \
   Modules/MContraction/WeakNonEye3pt.hpp \
   Modules/MFermion/FreeProp.hpp \
   Modules/MFermion/GaugeProp.hpp \
+  Modules/MFermion/EMLepton.hpp \
   Modules/MSource/SeqGamma.hpp \
+  Modules/MSource/SeqAslash.hpp \
   Modules/MSource/Point.hpp \
   Modules/MSource/Wall.hpp \
   Modules/MSource/Z2.hpp \
diff --git a/VERSION b/VERSION
deleted file mode 100644
index a0211af1..00000000
--- a/VERSION
+++ /dev/null
@@ -1,5 +0,0 @@
-Version : 0.8.0
-
-- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
-- MPI and MPI3 comms optimisations for KNL and OPA finished
-- Half precision comms
diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc
index d1ec1309..522e0cb5 100644
--- a/tests/IO/Test_serialisation.cc
+++ b/tests/IO/Test_serialisation.cc
@@ -370,5 +370,18 @@ int main(int argc,char **argv)
   tensorConvTest(rng, SpinMatrix);
   tensorConvTest(rng, SpinVector);
 
+  {
+    HMCparameters HMCparams;
+    HMCparams.StartingType     =std::string("CheckpointStart");
+    HMCparams.StartTrajectory  =7;
+    HMCparams.Trajectories     =1000;
+    HMCparams.NoMetropolisUntil=0;
+    HMCparams.MD.name          =std::string("Force Gradient");
+    HMCparams.MD.MDsteps       = 10;
+    HMCparams.MD.trajL         = 1.0;
+
+    XmlWriter HMCwr("HMCparameters.xml");
+    write(HMCwr,"HMCparameters",HMCparams);
+  }
   Grid_finalize();
 }
diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc
index 7fdf5f73..4eb6887d 100644
--- a/tests/core/Test_fft_gfix.cc
+++ b/tests/core/Test_fft_gfix.cc
@@ -60,6 +60,9 @@ int main (int argc, char ** argv)
   std::cout<< "* Testing we can gauge fix steep descent a RGT of Unit gauge    *" <<std::endl;
   std::cout<< "*****************************************************************" <<std::endl;
 
+  //  int coulomb_dir = -1;
+  int coulomb_dir = Nd-1;
+
   LatticeGaugeField   Umu(&GRID);
   LatticeGaugeField   Urnd(&GRID);
   LatticeGaugeField   Uorg(&GRID);
@@ -68,6 +71,7 @@ int main (int argc, char ** argv)
 
   LatticeColourMatrix   xform1(&GRID); // Gauge xform
   LatticeColourMatrix   xform2(&GRID); // Gauge xform
+  LatticeColourMatrix   xform3(&GRID); // Gauge xform
   
   SU3::ColdConfiguration(pRNG,Umu); // Unit gauge
   Uorg=Umu;
@@ -127,6 +131,22 @@ int main (int argc, char ** argv)
   plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
   std::cout << " Final plaquette "<<plaq << std::endl;
 
+  std::cout<< "*****************************************************************" <<std::endl;
+  std::cout<< "* Testing Fourier accelerated fixing to coulomb gauge           *" <<std::endl;
+  std::cout<< "*****************************************************************" <<std::endl;
+
+  Umu=Urnd;
+  SU3::HotConfiguration(pRNG,Umu); // Unit gauge
+
+  plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
+  std::cout << " Initial plaquette "<<plaq << std::endl;
+
+  FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,xform3,alpha,10000,1.0e-12, 1.0e-12,true,coulomb_dir);
+
+  std::cout << Umu<<std::endl;
+
+  plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
+  std::cout << " Final plaquette "<<plaq << std::endl;
 
   Grid_finalize();
 }