From 9cfc180334ab7aa16ce90405d7d11f51d164a94a Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Fri, 29 May 2015 14:11:34 +0100
Subject: [PATCH 01/20] Integer wrap problem fixed.

---
 benchmarks/Grid_comms.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/benchmarks/Grid_comms.cc b/benchmarks/Grid_comms.cc
index 59d709a0..c218070b 100644
--- a/benchmarks/Grid_comms.cc
+++ b/benchmarks/Grid_comms.cc
@@ -77,7 +77,8 @@ int main (int argc, char ** argv)
       }
       double stop=usecond();
 
-      double xbytes    = Nloop*bytes*2*ncomm;
+      double dbytes    = bytes;
+      double xbytes    = Nloop*dbytes*2.0*ncomm;
       double rbytes    = xbytes;
       double bidibytes = xbytes+rbytes;
 
@@ -151,8 +152,9 @@ int main (int argc, char ** argv)
       }
 
       double stop=usecond();
-
-      double xbytes    = Nloop*bytes*2*ncomm;
+      
+      double dbytes    = bytes;
+      double xbytes    = Nloop*dbytes*2.0*ncomm;
       double rbytes    = xbytes;
       double bidibytes = xbytes+rbytes;
 

From a75b6f6e78fab299b6c08f983ca92b4c4ebfcaf8 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Sun, 31 May 2015 15:09:02 +0100
Subject: [PATCH 02/20] Large scale change to support 5d fermion formulations.
 Have 5d replicated wilson with 4d gauge working and matrix regressing to Ls
 copies of wilson.

---
 benchmarks/Grid_comms.cc                      |   2 +-
 benchmarks/Grid_dwf.cc                        | 145 +++++++++++
 benchmarks/Grid_wilson.cc                     |   9 +-
 benchmarks/Grid_wilson_cg_prec.cc             |   6 +-
 benchmarks/Grid_wilson_cg_schur.cc            |   2 +-
 benchmarks/Grid_wilson_cg_unprec.cc           |   4 +-
 benchmarks/Grid_wilson_evenodd.cc             |   2 +-
 benchmarks/Makefile.am                        |   5 +-
 lib/Grid.h                                    |   2 +-
 lib/Grid_init.cc                              |   6 +-
 lib/Grid_stencil.h                            |  33 +--
 lib/Grid_threads.h                            |   2 +-
 lib/Makefile.am                               | 166 +++++++------
 lib/algorithms/SparseMatrix.h                 |   7 +-
 lib/algorithms/iterative/SchurRedBlack.h      |   4 +-
 lib/cartesian/Grid_cartesian_base.h           |  11 +-
 lib/cartesian/Grid_cartesian_full.h           |   7 +-
 lib/cartesian/Grid_cartesian_red_black.h      | 196 ++++++++++-----
 lib/cshift/Grid_cshift_common.h               |  14 +-
 lib/cshift/Grid_cshift_mpi.h                  |  16 +-
 lib/cshift/Grid_cshift_none.h                 |   2 +-
 lib/lattice/Grid_lattice_peekpoke.h           |   2 +-
 lib/lattice/Grid_lattice_transfer.h           |   3 +-
 lib/qcd/{Grid_qcd_dirac.cc => Dirac.cc}       |   0
 lib/qcd/{Grid_qcd_dirac.h => Dirac.h}         |   0
 lib/qcd/Grid_qcd_wilson_dop.cc                | 217 -----------------
 lib/qcd/Grid_qcd_wilson_dop.h                 | 105 --------
 lib/qcd/{Grid_qcd.h => QCD.h}                 |  17 +-
 lib/qcd/{Grid_qcd_2spinor.h => TwoSpinor.h}   |   0
 lib/qcd/action/Actions.h                      |  10 +
 lib/qcd/action/fermion/FermionAction.h        |  47 ++++
 .../action/fermion/FiveDimWilsonFermion.cc    | 228 ++++++++++++++++++
 lib/qcd/action/fermion/FiveDimWilsonFermion.h | 108 +++++++++
 lib/qcd/action/fermion/WilsonCompressor.h     |  61 +++++
 lib/qcd/action/fermion/WilsonFermion.cc       | 163 +++++++++++++
 lib/qcd/action/fermion/WilsonFermion.h        |  87 +++++++
 .../fermion/WilsonKernels.cc}                 |  46 ++--
 lib/qcd/action/fermion/WilsonKernels.h        |  42 ++++
 .../fermion/WilsonKernelsHand.cc}             |  57 ++---
 lib/stencil/Grid_lebesgue.cc                  | 103 ++++++++
 lib/stencil/Grid_lebesgue.h                   |  29 +++
 lib/stencil/Grid_stencil_common.cc            | 103 +-------
 scripts/linecount                             |   2 +-
 tests/Grid_cshift_red_black.cc                | 165 +++++++++++++
 tests/Makefile.am                             |   5 +-
 45 files changed, 1549 insertions(+), 692 deletions(-)
 create mode 100644 benchmarks/Grid_dwf.cc
 rename lib/qcd/{Grid_qcd_dirac.cc => Dirac.cc} (100%)
 rename lib/qcd/{Grid_qcd_dirac.h => Dirac.h} (100%)
 delete mode 100644 lib/qcd/Grid_qcd_wilson_dop.cc
 delete mode 100644 lib/qcd/Grid_qcd_wilson_dop.h
 rename lib/qcd/{Grid_qcd.h => QCD.h} (97%)
 rename lib/qcd/{Grid_qcd_2spinor.h => TwoSpinor.h} (100%)
 create mode 100644 lib/qcd/action/Actions.h
 create mode 100644 lib/qcd/action/fermion/FermionAction.h
 create mode 100644 lib/qcd/action/fermion/FiveDimWilsonFermion.cc
 create mode 100644 lib/qcd/action/fermion/FiveDimWilsonFermion.h
 create mode 100644 lib/qcd/action/fermion/WilsonCompressor.h
 create mode 100644 lib/qcd/action/fermion/WilsonFermion.cc
 create mode 100644 lib/qcd/action/fermion/WilsonFermion.h
 rename lib/qcd/{Grid_qcd_dhop.cc => action/fermion/WilsonKernels.cc} (87%)
 create mode 100644 lib/qcd/action/fermion/WilsonKernels.h
 rename lib/qcd/{Grid_qcd_dhop_hand.cc => action/fermion/WilsonKernelsHand.cc} (92%)
 create mode 100644 lib/stencil/Grid_lebesgue.cc
 create mode 100644 lib/stencil/Grid_lebesgue.h
 create mode 100644 tests/Grid_cshift_red_black.cc

diff --git a/benchmarks/Grid_comms.cc b/benchmarks/Grid_comms.cc
index c218070b..a24e7ed8 100644
--- a/benchmarks/Grid_comms.cc
+++ b/benchmarks/Grid_comms.cc
@@ -82,7 +82,7 @@ int main (int argc, char ** argv)
       double rbytes    = xbytes;
       double bidibytes = xbytes+rbytes;
 
-      double time = stop-start;
+      double time = stop-start; // microseconds
 
       std::cout << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
     }
diff --git a/benchmarks/Grid_dwf.cc b/benchmarks/Grid_dwf.cc
new file mode 100644
index 00000000..eb1d9299
--- /dev/null
+++ b/benchmarks/Grid_dwf.cc
@@ -0,0 +1,145 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  std::vector<int> latt4 = GridDefaultLatt();
+  std::vector<int> simd4 = GridDefaultSimd(Nd,vComplexF::Nsimd());
+  std::vector<int> mpi4  = GridDefaultMpi();
+
+  assert(latt4.size()==4 ); 
+  assert(simd4.size()==4 );
+  assert(mpi4.size() ==4 );
+
+  const int Ls=1;
+  std::vector<int> latt5({Ls,latt4[0],latt4[1],latt4[2],latt4[3]});
+  std::vector<int> simd5({1 ,simd4[0],simd4[1],simd4[2],simd4[3]}); 
+  std::vector<int>  mpi5({1 , mpi4[0], mpi4[1], mpi4[2], mpi4[3]}); 
+  std::vector<int>   cb5({0,1,1,1,1}); // Checkerboard 4d only
+  int                cbd=1;            // use dim-1 to reduce
+
+  // Four dim grid for gauge field U
+  GridCartesian               UGrid(latt4,simd4,mpi4); 
+  GridRedBlackCartesian     UrbGrid(&UGrid);
+
+  // Five dim grid for fermions F
+  GridCartesian               FGrid(latt5,simd5,mpi5); 
+  GridRedBlackCartesian     FrbGrid(latt5,simd5,mpi5,cb5,cbd); 
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+
+  GridParallelRNG          RNG5(&FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  LatticeFermion src   (&FGrid); random(RNG5,src);
+  LatticeFermion result(&FGrid); result=zero;
+  LatticeFermion    ref(&FGrid);    ref=zero;
+  LatticeFermion    tmp(&FGrid);
+  LatticeFermion    err(&FGrid);
+
+  ColourMatrix cm = Complex(1.0,0.0);
+
+  GridParallelRNG          RNG4(&UGrid);  RNG4.SeedFixedIntegers(seeds4);
+  LatticeGaugeField Umu(&UGrid); random(RNG4,Umu);
+  LatticeGaugeField Umu5d(&FGrid); 
+
+  // replicate across fifth dimension
+  for(int ss=0;ss<Umu._grid->oSites();ss++){
+    for(int s=0;s<Ls;s++){
+      Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
+    }
+  }
+
+  ////////////////////////////////////
+  // Naive wilson implementation
+  ////////////////////////////////////
+  std::vector<LatticeColourMatrix> U(4,&FGrid);
+  for(int mu=0;mu<Nd;mu++){
+    U[mu] = peekIndex<LorentzIndex>(Umu5d,mu);
+  }
+
+  if (1)
+  {
+    ref = zero;
+    for(int mu=0;mu<Nd;mu++){
+
+      tmp = U[mu]*Cshift(src,mu+1,1);
+      ref=ref + tmp + Gamma(Gmu[mu])*tmp;
+
+      tmp =adj(U[mu])*src;
+      tmp =Cshift(tmp,mu+1,-1);
+      ref=ref + tmp - Gamma(Gmu[mu])*tmp;
+    }
+    ref = -0.5*ref;
+  }
+
+  RealD mass=0.1;
+  FiveDimWilsonFermion Dw(Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass);
+  
+  std::cout << "Calling Dw"<<std::endl;
+  int ncall=1000;
+  double t0=usecond();
+  for(int i=0;i<ncall;i++){
+    Dw.Dhop(src,result,0);
+  }
+  double t1=usecond();
+
+
+  double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
+  double flops=1344*volume*ncall;
+  
+  std::cout << "Called Dw"<<std::endl;
+  std::cout << "norm result "<< norm2(result)<<std::endl;
+  std::cout << "norm ref    "<< norm2(ref)<<std::endl;
+  std::cout << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
+  err = ref-result; 
+  std::cout << "norm diff   "<< norm2(err)<<std::endl;
+
+
+  if (1)
+  { // Naive wilson dag implementation
+    ref = zero;
+    for(int mu=0;mu<Nd;mu++){
+
+      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
+      tmp = U[mu]*Cshift(src,mu+1,1);
+      for(int i=0;i<ref._odata.size();i++){
+	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
+      }
+
+      tmp =adj(U[mu])*src;
+      tmp =Cshift(tmp,mu+1,-1);
+      for(int i=0;i<ref._odata.size();i++){
+	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
+      }
+    }
+    ref = -0.5*ref;
+  }
+  Dw.Dhop(src,result,1);
+  std::cout << "Called DwDag"<<std::endl;
+  std::cout << "norm result "<< norm2(result)<<std::endl;
+  std::cout << "norm ref    "<< norm2(ref)<<std::endl;
+  err = ref-result; 
+  std::cout << "norm diff   "<< norm2(err)<<std::endl;
+
+  Grid_finalize();
+}
diff --git a/benchmarks/Grid_wilson.cc b/benchmarks/Grid_wilson.cc
index 3b0d04bc..c1050098 100644
--- a/benchmarks/Grid_wilson.cc
+++ b/benchmarks/Grid_wilson.cc
@@ -49,6 +49,7 @@ int main (int argc, char ** argv)
   }  
 
   // Only one non-zero (y)
+#if 0
   Umu=zero;
   Complex cone(1.0,0.0);
   for(int nn=0;nn<Nd;nn++){
@@ -59,6 +60,7 @@ int main (int argc, char ** argv)
     }
     pokeIndex<LorentzIndex>(Umu,U[nn],nn);
   }
+#endif
 
   for(int mu=0;mu<Nd;mu++){
     U[mu] = peekIndex<LorentzIndex>(Umu,mu);
@@ -80,9 +82,9 @@ int main (int argc, char ** argv)
       }
     }
   }
-
+  ref = -0.5*ref;
   RealD mass=0.1;
-  WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
   
   std::cout << "Calling Dw"<<std::endl;
   int ncall=10000;
@@ -91,7 +93,7 @@ int main (int argc, char ** argv)
     Dw.Dhop(src,result,0);
   }
   double t1=usecond();
-  double flops=1320*volume*ncall;
+  double flops=1344*volume*ncall;
   
   std::cout << "Called Dw"<<std::endl;
   std::cout << "norm result "<< norm2(result)<<std::endl;
@@ -129,6 +131,7 @@ int main (int argc, char ** argv)
       }
     }
   }
+  ref = -0.5*ref;
   Dw.Dhop(src,result,1);
   std::cout << "Called DwDag"<<std::endl;
   std::cout << "norm result "<< norm2(result)<<std::endl;
diff --git a/benchmarks/Grid_wilson_cg_prec.cc b/benchmarks/Grid_wilson_cg_prec.cc
index e86ec820..e376349c 100644
--- a/benchmarks/Grid_wilson_cg_prec.cc
+++ b/benchmarks/Grid_wilson_cg_prec.cc
@@ -42,9 +42,9 @@ int main (int argc, char ** argv)
   }
   
   RealD mass=0.5;
-  WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
 
-  //  HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
+  //  HermitianOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
   //  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   //  CG(HermOp,src,result);
   
@@ -53,7 +53,7 @@ int main (int argc, char ** argv)
   pickCheckerboard(Odd,src_o,src);
   result_o=zero;
 
-  HermitianCheckerBoardedOperator<WilsonMatrix,LatticeFermion> HermOpEO(Dw);
+  HermitianCheckerBoardedOperator<WilsonFermion,LatticeFermion> HermOpEO(Dw);
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   CG(HermOpEO,src_o,result_o);
 
diff --git a/benchmarks/Grid_wilson_cg_schur.cc b/benchmarks/Grid_wilson_cg_schur.cc
index af630ae1..28db1d4b 100644
--- a/benchmarks/Grid_wilson_cg_schur.cc
+++ b/benchmarks/Grid_wilson_cg_schur.cc
@@ -37,7 +37,7 @@ int main (int argc, char ** argv)
   LatticeFermion resid(&Grid); 
 
   RealD mass=0.5;
-  WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
 
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   SchurRedBlackSolve<LatticeFermion> SchurSolver(CG);
diff --git a/benchmarks/Grid_wilson_cg_unprec.cc b/benchmarks/Grid_wilson_cg_unprec.cc
index 15302aab..905dfde5 100644
--- a/benchmarks/Grid_wilson_cg_unprec.cc
+++ b/benchmarks/Grid_wilson_cg_unprec.cc
@@ -47,9 +47,9 @@ int main (int argc, char ** argv)
   }
   
   RealD mass=0.5;
-  WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
 
-  HermitianOperator<WilsonMatrix,LatticeFermion> HermOp(Dw);
+  HermitianOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   CG(HermOp,src,result);
 
diff --git a/benchmarks/Grid_wilson_evenodd.cc b/benchmarks/Grid_wilson_evenodd.cc
index 4bc8c357..a073139d 100644
--- a/benchmarks/Grid_wilson_evenodd.cc
+++ b/benchmarks/Grid_wilson_evenodd.cc
@@ -60,7 +60,7 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
 
-  WilsonMatrix Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
 
   LatticeFermion src_e   (&RBGrid);
   LatticeFermion src_o   (&RBGrid);
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index 95ae5eca..b2649669 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -5,11 +5,14 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 #
 # Test code
 #
-bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec Grid_wilson_evenodd  Grid_wilson_cg_prec Grid_wilson_cg_schur
+bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec Grid_wilson_evenodd  Grid_wilson_cg_prec Grid_wilson_cg_schur Grid_dwf
 
 Grid_wilson_SOURCES = Grid_wilson.cc
 Grid_wilson_LDADD = -lGrid
 
+Grid_dwf_SOURCES = Grid_dwf.cc
+Grid_dwf_LDADD = -lGrid
+
 Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
 Grid_wilson_evenodd_LDADD = -lGrid
 
diff --git a/lib/Grid.h b/lib/Grid.h
index 1e513a46..7fa56892 100644
--- a/lib/Grid.h
+++ b/lib/Grid.h
@@ -60,7 +60,7 @@
 
 #include <Grid_algorithms.h>// subdir aggregate
 
-#include <qcd/Grid_qcd.h>
+#include <qcd/QCD.h>
 #include <parallelIO/GridNerscIO.h>
 
 namespace Grid {
diff --git a/lib/Grid_init.cc b/lib/Grid_init.cc
index f72393cb..580e602a 100644
--- a/lib/Grid_init.cc
+++ b/lib/Grid_init.cc
@@ -142,7 +142,11 @@ void Grid_init(int *argc,char ***argv)
     Grid_quiesce_nodes();
   }
   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
-    WilsonMatrix::HandOptDslash=1;
+    WilsonFermion::HandOptDslash=1;
+    FiveDimWilsonFermion::HandOptDslash=1;
+  }
+  if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
+    LebesgueOrder::UseLebesgueOrder=1;
   }
   GridParseLayout(*argv,*argc,
 		  Grid_default_latt,
diff --git a/lib/Grid_stencil.h b/lib/Grid_stencil.h
index fa22361b..50d22453 100644
--- a/lib/Grid_stencil.h
+++ b/lib/Grid_stencil.h
@@ -1,6 +1,8 @@
 #ifndef GRID_STENCIL_H
 #define GRID_STENCIL_H
 
+#include <stencil/Grid_lebesgue.h>   // subdir aggregate
+
 //////////////////////////////////////////////////////////////////////////////////////////
 // Must not lose sight that goal is to be able to construct really efficient
 // gather to a point stencil code. CSHIFT is not the best way, so need
@@ -38,29 +40,12 @@
 //////////////////////////////////////////////////////////////////////////////////////////
 
 namespace Grid {
-
-
+  
 
   class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
   public:
 
       typedef uint32_t StencilInteger;
-      
-
-
-      StencilInteger alignup(StencilInteger n){
-	n--;           // 1000 0011 --> 1000 0010
-	n |= n >> 1;   // 1000 0010 | 0100 0001 = 1100 0011
-	n |= n >> 2;   // 1100 0011 | 0011 0000 = 1111 0011
-	n |= n >> 4;   // 1111 0011 | 0000 1111 = 1111 1111
-	n |= n >> 8;   // ... (At this point all bits are 1, so further bitwise-or
-	n |= n >> 16;  //      operations produce no effect.)
-	n++;           // 1111 1111 --> 1 0000 0000
-	return n;
-      };
-      void LebesgueOrder(void);
-
-      std::vector<StencilInteger> _LebesgueReorder;
 
       int                               _checkerboard;
       int                               _npoints; // Move to template param?
@@ -131,8 +116,8 @@ namespace Grid {
 	  // Gather phase
 	  int sshift [2];
 	  if ( comm_dim ) {
-	    sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,0);
-	    sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
+	    sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
+	    sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
 	    if ( sshift[0] == sshift[1] ) {
 	      if (splice_dim) {
 		GatherStartCommsSimd(source,dimension,shift,0x3,u_comm_buf,u_comm_offset,compress);
@@ -179,8 +164,8 @@ namespace Grid {
 	  std::vector<cobj,alignedAllocator<cobj> > send_buf(buffer_size); // hmm...
 	  std::vector<cobj,alignedAllocator<cobj> > recv_buf(buffer_size);
 	  
-	  int cb= (cbmask==0x2)? 1 : 0;
-	  int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
+	  int cb= (cbmask==0x2)? Odd : Even;
+	  int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 	  
 	  for(int x=0;x<rd;x++){       
 	    
@@ -266,8 +251,8 @@ namespace Grid {
 	  // Work out what to send where
 	  ///////////////////////////////////////////
 
-	  int cb    = (cbmask==0x2)? 1 : 0;
-	  int sshift= _grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
+	  int cb    = (cbmask==0x2)? Odd : Even;
+	  int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 	  
 	  // loop over outer coord planes orthog to dim
 	  for(int x=0;x<rd;x++){       
diff --git a/lib/Grid_threads.h b/lib/Grid_threads.h
index 24581855..f453b860 100644
--- a/lib/Grid_threads.h
+++ b/lib/Grid_threads.h
@@ -9,7 +9,7 @@
 
 #ifdef GRID_OMP
 #include <omp.h>
-#define PARALLEL_FOR_LOOP _Pragma("omp parallel for")
+#define PARALLEL_FOR_LOOP _Pragma("omp parallel for ")
 #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 #else
 #define PARALLEL_FOR_LOOP 
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a5b89c0a..aa531df7 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -16,87 +16,103 @@ endif
 lib_LIBRARIES = libGrid.a
 libGrid_a_SOURCES =				\
 	Grid_init.cc				\
+	stencil/Grid_lebesgue.cc		\
 	stencil/Grid_stencil_common.cc		\
-	qcd/Grid_qcd_dirac.cc			\
-	qcd/Grid_qcd_dhop.cc			\
-	qcd/Grid_qcd_dhop_hand.cc		\
-	qcd/Grid_qcd_wilson_dop.cc		\
 	algorithms/approx/Zolotarev.cc		\
 	algorithms/approx/Remez.cc		\
+	qcd/action/fermion/FiveDimWilsonFermion.cc\
+	qcd/action/fermion/WilsonFermion.cc\
+	qcd/action/fermion/WilsonKernels.cc\
+	qcd/action/fermion/WilsonKernelsHand.cc\
+	qcd/Dirac.cc\
 	$(extra_sources)
 
 #
 # Include files
 #
-nobase_include_HEADERS = algorithms/approx/bigfloat.h		\
-	algorithms/approx/Chebyshev.h				\
-	algorithms/approx/Remez.h				\
-	algorithms/approx/Zolotarev.h				\
-	algorithms/iterative/ConjugateGradient.h		\
-	algorithms/iterative/NormalEquations.h			\
-	algorithms/iterative/SchurRedBlack.h			\
-	algorithms/LinearOperator.h				\
-	algorithms/SparseMatrix.h				\
-	cartesian/Grid_cartesian_base.h				\
-	cartesian/Grid_cartesian_full.h				\
-	cartesian/Grid_cartesian_red_black.h			\
-	communicator/Grid_communicator_base.h			\
-	cshift/Grid_cshift_common.h				\
-	cshift/Grid_cshift_mpi.h				\
-	cshift/Grid_cshift_none.h				\
-	Grid.h							\
-	Grid_algorithms.h					\
-	Grid_aligned_allocator.h				\
-	Grid_cartesian.h					\
-	Grid_communicator.h					\
-	Grid_comparison.h					\
-	Grid_cshift.h						\
-	Grid_extract.h						\
-	Grid_lattice.h						\
-	Grid_math.h						\
-	Grid_simd.h						\
-	Grid_stencil.h						\
-	Grid_threads.h						\
-	lattice/Grid_lattice_arith.h				\
-	lattice/Grid_lattice_base.h				\
-	lattice/Grid_lattice_comparison.h			\
-	lattice/Grid_lattice_conformable.h			\
-	lattice/Grid_lattice_coordinate.h			\
-	lattice/Grid_lattice_ET.h				\
-	lattice/Grid_lattice_local.h				\
-	lattice/Grid_lattice_overload.h				\
-	lattice/Grid_lattice_peekpoke.h				\
-	lattice/Grid_lattice_reality.h				\
-	lattice/Grid_lattice_reduction.h			\
-	lattice/Grid_lattice_rng.h				\
-	lattice/Grid_lattice_trace.h				\
-	lattice/Grid_lattice_transfer.h				\
-	lattice/Grid_lattice_transpose.h			\
-	lattice/Grid_lattice_where.h				\
-	math/Grid_math_arith.h					\
-	math/Grid_math_arith_add.h				\
-	math/Grid_math_arith_mac.h				\
-	math/Grid_math_arith_mul.h				\
-	math/Grid_math_arith_scalar.h				\
-	math/Grid_math_arith_sub.h				\
-	math/Grid_math_inner.h					\
-	math/Grid_math_outer.h					\
-	math/Grid_math_peek.h					\
-	math/Grid_math_poke.h					\
-	math/Grid_math_reality.h				\
-	math/Grid_math_tensors.h				\
-	math/Grid_math_trace.h					\
-	math/Grid_math_traits.h					\
-	math/Grid_math_transpose.h				\
-	parallelIO/GridNerscIO.h				\
-	qcd/Grid_qcd.h						\
-	qcd/Grid_qcd_2spinor.h					\
-	qcd/Grid_qcd_dirac.h					\
-	qcd/Grid_qcd_wilson_dop.h				\
-	simd/Grid_vector_types.h				\
-	simd/Grid_sse4.h					\
-	simd/Grid_avx.h						\
-	simd/Grid_avx512.h					
-
-
+nobase_include_HEADERS=\
+		./algorithms/approx/bigfloat.h\
+		./algorithms/approx/bigfloat_double.h\
+		./algorithms/approx/Chebyshev.h\
+		./algorithms/approx/Remez.h\
+		./algorithms/approx/Zolotarev.h\
+		./algorithms/iterative/ConjugateGradient.h\
+		./algorithms/iterative/NormalEquations.h\
+		./algorithms/iterative/SchurRedBlack.h\
+		./algorithms/LinearOperator.h\
+		./algorithms/SparseMatrix.h\
+		./cartesian/Grid_cartesian_base.h\
+		./cartesian/Grid_cartesian_full.h\
+		./cartesian/Grid_cartesian_red_black.h\
+		./communicator/Grid_communicator_base.h\
+		./cshift/Grid_cshift_common.h\
+		./cshift/Grid_cshift_mpi.h\
+		./cshift/Grid_cshift_none.h\
+		./Grid.h\
+		./Grid_algorithms.h\
+		./Grid_aligned_allocator.h\
+		./Grid_cartesian.h\
+		./Grid_communicator.h\
+		./Grid_comparison.h\
+		./Grid_config.h\
+		./Grid_cshift.h\
+		./Grid_extract.h\
+		./Grid_lattice.h\
+		./Grid_math.h\
+		./Grid_simd.h\
+		./Grid_stencil.h\
+		./Grid_threads.h\
+		./lattice/Grid_lattice_arith.h\
+		./lattice/Grid_lattice_base.h\
+		./lattice/Grid_lattice_comparison.h\
+		./lattice/Grid_lattice_conformable.h\
+		./lattice/Grid_lattice_coordinate.h\
+		./lattice/Grid_lattice_ET.h\
+		./lattice/Grid_lattice_local.h\
+		./lattice/Grid_lattice_overload.h\
+		./lattice/Grid_lattice_peekpoke.h\
+		./lattice/Grid_lattice_reality.h\
+		./lattice/Grid_lattice_reduction.h\
+		./lattice/Grid_lattice_rng.h\
+		./lattice/Grid_lattice_trace.h\
+		./lattice/Grid_lattice_transfer.h\
+		./lattice/Grid_lattice_transpose.h\
+		./lattice/Grid_lattice_where.h\
+		./math/Grid_math_arith.h\
+		./math/Grid_math_arith_add.h\
+		./math/Grid_math_arith_mac.h\
+		./math/Grid_math_arith_mul.h\
+		./math/Grid_math_arith_scalar.h\
+		./math/Grid_math_arith_sub.h\
+		./math/Grid_math_inner.h\
+		./math/Grid_math_outer.h\
+		./math/Grid_math_peek.h\
+		./math/Grid_math_poke.h\
+		./math/Grid_math_reality.h\
+		./math/Grid_math_tensors.h\
+		./math/Grid_math_trace.h\
+		./math/Grid_math_traits.h\
+		./math/Grid_math_transpose.h\
+		./parallelIO/GridNerscIO.h\
+		./qcd/action/Actions.h\
+		./qcd/action/fermion/FermionAction.h\
+		./qcd/action/fermion/FiveDimWilsonFermion.h\
+		./qcd/action/fermion/WilsonCompressor.h\
+		./qcd/action/fermion/WilsonFermion.h\
+		./qcd/action/fermion/WilsonKernels.h\
+		./qcd/Dirac.h\
+		./qcd/QCD.h\
+		./qcd/TwoSpinor.h\
+		./qcd/FermionAction.h\
+		./simd/Grid_avx.h\
+		./simd/Grid_avx512.h\
+		./simd/Grid_qpx.h\
+		./simd/Grid_sse4.h\
+		./simd/Grid_vector_types.h\
+		./simd/Old/Grid_vComplexD.h\
+		./simd/Old/Grid_vComplexF.h\
+		./simd/Old/Grid_vInteger.h\
+		./simd/Old/Grid_vRealD.h\
+		./simd/Old/Grid_vRealF.h\
+		./stencil/Grid_lebesgue.h
 
diff --git a/lib/algorithms/SparseMatrix.h b/lib/algorithms/SparseMatrix.h
index 7bfe959b..9c955e9a 100644
--- a/lib/algorithms/SparseMatrix.h
+++ b/lib/algorithms/SparseMatrix.h
@@ -10,7 +10,7 @@ namespace Grid {
   /////////////////////////////////////////////////////////////////////////////////////////////
     template<class Field> class SparseMatrixBase {
     public:
-      GridBase *_grid;
+      virtual GridBase *Grid(void) =0;
       // Full checkerboar operations
       virtual RealD M    (const Field &in, Field &out)=0;
       virtual RealD Mdag (const Field &in, Field &out)=0;
@@ -19,7 +19,6 @@ namespace Grid {
 	ni=M(in,tmp);
 	no=Mdag(tmp,out);
       }
-      SparseMatrixBase(GridBase *grid) : _grid(grid) {};
     };
 
   /////////////////////////////////////////////////////////////////////////////////////////////
@@ -27,7 +26,7 @@ namespace Grid {
   /////////////////////////////////////////////////////////////////////////////////////////////
     template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
     public:
-      GridBase *_cbgrid;
+      virtual GridBase *RedBlackGrid(void)=0;
       // half checkerboard operaions
       virtual  void Meooe    (const Field &in, Field &out)=0;
       virtual  void Mooee    (const Field &in, Field &out)=0;
@@ -62,9 +61,7 @@ namespace Grid {
 	Field tmp(in._grid);
 	ni=Mpc(in,tmp);
 	no=MpcDag(tmp,out);
-	//	std::cout<<"MpcDagMpc "<<ni<<" "<<no<<std::endl;
       }
-      CheckerBoardedSparseMatrixBase(GridBase *grid,GridBase *cbgrid) : SparseMatrixBase<Field>(grid), _cbgrid(cbgrid) {};
     };
 
 }
diff --git a/lib/algorithms/iterative/SchurRedBlack.h b/lib/algorithms/iterative/SchurRedBlack.h
index d3a76d7f..109f554b 100644
--- a/lib/algorithms/iterative/SchurRedBlack.h
+++ b/lib/algorithms/iterative/SchurRedBlack.h
@@ -60,8 +60,8 @@ namespace Grid {
 
       // FIXME CGdiagonalMee not implemented virtual function
       // FIXME use CBfactorise to control schur decomp
-      GridBase *grid = _Matrix._cbgrid;
-      GridBase *fgrid= _Matrix._grid;
+      GridBase *grid = _Matrix.RedBlackGrid();
+      GridBase *fgrid= _Matrix.Grid();
  
       Field src_e(grid);
       Field src_o(grid);
diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Grid_cartesian_base.h
index a74773f8..e93125c1 100644
--- a/lib/cartesian/Grid_cartesian_base.h
+++ b/lib/cartesian/Grid_cartesian_base.h
@@ -52,16 +52,13 @@ public:
     ////////////////////////////////////////////////////////////////
     virtual int CheckerBoarded(int dim)=0;
     virtual int CheckerBoard(std::vector<int> site)=0;
-    virtual int CheckerBoardDestination(int source_cb,int shift)=0;
+    virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
     virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
-    inline int  CheckerBoardFromOindex (int Oindex){
+    virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
+    int  CheckerBoardFromOindex (int Oindex){
       std::vector<int> ocoor;
       oCoorFromOindex(ocoor,Oindex); 
-      int ss=0;
-      for(int d=0;d<_ndimension;d++){
-	ss=ss+ocoor[d];
-      }      
-      return ss&0x1;
+      return CheckerBoard(ocoor);
     }
 
     //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/lib/cartesian/Grid_cartesian_full.h b/lib/cartesian/Grid_cartesian_full.h
index 73bd08b3..330bbfaf 100644
--- a/lib/cartesian/Grid_cartesian_full.h
+++ b/lib/cartesian/Grid_cartesian_full.h
@@ -18,11 +18,14 @@ public:
     virtual int CheckerBoard(std::vector<int> site){
         return 0;
     }
-    virtual int CheckerBoardDestination(int cb,int shift){
+    virtual int CheckerBoardDestination(int cb,int shift,int dim){
         return 0;
     }
+    virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){
+      return shift;
+    }
     virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
-        return shift;
+      return shift;
     }
     GridCartesian(std::vector<int> &dimensions,
 		  std::vector<int> &simd_layout,
diff --git a/lib/cartesian/Grid_cartesian_red_black.h b/lib/cartesian/Grid_cartesian_red_black.h
index ff2d3ba8..ace36edb 100644
--- a/lib/cartesian/Grid_cartesian_red_black.h
+++ b/lib/cartesian/Grid_cartesian_red_black.h
@@ -8,6 +8,10 @@ namespace Grid {
     static const int CbBlack=1;
     static const int Even   =CbRed;
     static const int Odd    =CbBlack;
+
+    // Perhaps these are misplaced and 
+    // should be in sparse matrix.
+    // Also should make these a named enum type
     static const int DaggerNo=0;
     static const int DaggerYes=1;
 
@@ -15,116 +19,174 @@ namespace Grid {
 class GridRedBlackCartesian : public GridBase
 {
 public:
+    std::vector<int> _checker_dim_mask;
+    int              _checker_dim;
+
     virtual int CheckerBoarded(int dim){
-      if( dim==0) return 1;
+      if( dim==_checker_dim) return 1;
       else return 0;
     }
     virtual int CheckerBoard(std::vector<int> site){
-      return (site[0]+site[1]+site[2]+site[3])&0x1;
+      int linear=0;
+      assert(site.size()==_ndimension);
+      for(int d=0;d<_ndimension;d++){ 
+	if(_checker_dim_mask[d])
+	  linear=linear+site[d];
+      }
+      return (linear&0x1);
     }
 
+
     // Depending on the cb of site, we toggle source cb.
     // for block #b, element #e = (b, e)
     // we need 
-    virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
+    virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int ocb){
+      if(dim != _checker_dim) return shift;
 
-      if(dim != 0) return shift;
-
-      int fulldim =_fdimensions[0];
+      int fulldim =_fdimensions[dim];
       shift = (shift+fulldim)%fulldim;
 
       // Probably faster with table lookup;
       // or by looping over x,y,z and multiply rather than computing checkerboard.
-      int ocb=CheckerBoardFromOindex(osite);
 	  
       if ( (source_cb+ocb)&1 ) {
+
 	return (shift)/2;
       } else {
 	return (shift+1)/2;
       }
     }
+    virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
 
-    virtual int CheckerBoardDestination(int source_cb,int shift){
-        if ((shift+_fdimensions[0])&0x1) {
+      if(dim != _checker_dim) return shift;
+
+      int ocb=CheckerBoardFromOindex(osite);
+      
+      return CheckerBoardShiftForCB(source_cb,dim,shift,ocb);
+    }
+    
+    virtual int CheckerBoardDestination(int source_cb,int shift,int dim){
+      if ( _checker_dim_mask[dim]  ) {
+	// If _fdimensions[checker_dim] is odd, then shifting by 1 in other dims
+	// does NOT cause a parity hop.
+	int add=(dim==_checker_dim) ? 0 : _fdimensions[_checker_dim];
+        if ( (shift+add) &0x1) {
             return 1-source_cb;
         } else {
             return source_cb;
         }
+      } else {
+	return source_cb;
+
+      }
     };
 
     GridRedBlackCartesian(GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors)  {};
 
     GridRedBlackCartesian(std::vector<int> &dimensions,
 			  std::vector<int> &simd_layout,
-			  std::vector<int> &processor_grid ) : GridBase(processor_grid)
+			  std::vector<int> &processor_grid,
+			  std::vector<int> &checker_dim_mask,
+			  int checker_dim
+			  ) :  GridBase(processor_grid) 
+    {
+      Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
+    }
+    GridRedBlackCartesian(std::vector<int> &dimensions,
+			  std::vector<int> &simd_layout,
+			  std::vector<int> &processor_grid) : GridBase(processor_grid) 
+    {
+      std::vector<int> checker_dim_mask(dimensions.size(),1);
+      Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0);
+    }
+    void Init(std::vector<int> &dimensions,
+	      std::vector<int> &simd_layout,
+	      std::vector<int> &processor_grid,
+	      std::vector<int> &checker_dim_mask,
+	      int checker_dim)
     {
     ///////////////////////
     // Grid information
     ///////////////////////
-        _ndimension = dimensions.size();
-        
-        _fdimensions.resize(_ndimension);
-        _gdimensions.resize(_ndimension);
-        _ldimensions.resize(_ndimension);
-        _rdimensions.resize(_ndimension);
-        _simd_layout.resize(_ndimension);
-        
-        _ostride.resize(_ndimension);
-        _istride.resize(_ndimension);
-        
-        _fsites = _gsites = _osites = _isites = 1;
+      _checker_dim = checker_dim;
+      assert(checker_dim_mask[checker_dim]==1);
+      _ndimension = dimensions.size();
+      assert(checker_dim_mask.size()==_ndimension);
+      assert(processor_grid.size()==_ndimension);
+      assert(simd_layout.size()==_ndimension);
+      
+      _fdimensions.resize(_ndimension);
+      _gdimensions.resize(_ndimension);
+      _ldimensions.resize(_ndimension);
+      _rdimensions.resize(_ndimension);
+      _simd_layout.resize(_ndimension);
+      
+      _ostride.resize(_ndimension);
+      _istride.resize(_ndimension);
+      
+      _fsites = _gsites = _osites = _isites = 1;
+	
+      _checker_dim_mask=checker_dim_mask;
 
-        for(int d=0;d<_ndimension;d++){
-            _fdimensions[d] = dimensions[d];
-            _gdimensions[d] = _fdimensions[d];
-	    _fsites = _fsites * _fdimensions[d];
-	    _gsites = _gsites * _gdimensions[d];
-                
-            if (d==0) _gdimensions[0] = _gdimensions[0]/2; // Remove a checkerboard
-            _ldimensions[d] = _gdimensions[d]/_processors[d];
-
-            // Use a reduced simd grid
-            _simd_layout[d] = simd_layout[d];
-            _rdimensions[d]= _ldimensions[d]/_simd_layout[d];
-
-            _osites *= _rdimensions[d];
-            _isites *= _simd_layout[d];
-                
-            // Addressing support
-            if ( d==0 ) {
-                _ostride[d] = 1;
-                _istride[d] = 1;
-            } else {
-                _ostride[d] = _ostride[d-1]*_rdimensions[d-1];
-                _istride[d] = _istride[d-1]*_simd_layout[d-1];
-            }
-        }
-            
-        ////////////////////////////////////////////////////////////////////////////////////////////
-        // subplane information
-        ////////////////////////////////////////////////////////////////////////////////////////////
-        _slice_block.resize(_ndimension);
-        _slice_stride.resize(_ndimension);
-        _slice_nblock.resize(_ndimension);
+      for(int d=0;d<_ndimension;d++){
+	_fdimensions[d] = dimensions[d];
+	_gdimensions[d] = _fdimensions[d];
+	_fsites = _fsites * _fdimensions[d];
+	_gsites = _gsites * _gdimensions[d];
         
-        int block =1;
-        int nblock=1;
-        for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d];
-            
-        for(int d=0;d<_ndimension;d++){
-            nblock/=_rdimensions[d];
-            _slice_block[d] =block;
-            _slice_stride[d]=_ostride[d]*_rdimensions[d];
-            _slice_nblock[d]=nblock;
-            block = block*_rdimensions[d];
-        }
+	if (d==_checker_dim) {
+	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard
+	}
+	_ldimensions[d] = _gdimensions[d]/_processors[d];
+
+	// Use a reduced simd grid
+	_simd_layout[d] = simd_layout[d];
+	_rdimensions[d]= _ldimensions[d]/_simd_layout[d];
+	
+	_osites *= _rdimensions[d];
+	_isites *= _simd_layout[d];
+        
+	// Addressing support
+	if ( d==0 ) {
+	  _ostride[d] = 1;
+	  _istride[d] = 1;
+	} else {
+	  _ostride[d] = _ostride[d-1]*_rdimensions[d-1];
+	  _istride[d] = _istride[d-1]*_simd_layout[d-1];
+	}
+      }
             
+      ////////////////////////////////////////////////////////////////////////////////////////////
+      // subplane information
+      ////////////////////////////////////////////////////////////////////////////////////////////
+      _slice_block.resize(_ndimension);
+      _slice_stride.resize(_ndimension);
+      _slice_nblock.resize(_ndimension);
+        
+      int block =1;
+      int nblock=1;
+      for(int d=0;d<_ndimension;d++) nblock*=_rdimensions[d];
+      
+      for(int d=0;d<_ndimension;d++){
+	nblock/=_rdimensions[d];
+	_slice_block[d] =block;
+	_slice_stride[d]=_ostride[d]*_rdimensions[d];
+	_slice_nblock[d]=nblock;
+	block = block*_rdimensions[d];
+      }
+      
     };
 protected:
     virtual int oIndex(std::vector<int> &coor)
     {
-        int idx=_ostride[0]*((coor[0]/2)%_rdimensions[0]);
-        for(int d=1;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
+      int idx=0;
+      for(int d=0;d<_ndimension;d++) {
+	if( d==_checker_dim ) {
+	  idx+=_ostride[d]*((coor[d]/2)%_rdimensions[d]);
+	} else {
+	  idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
+	}
+      }
         return idx;
     };
         
diff --git a/lib/cshift/Grid_cshift_common.h b/lib/cshift/Grid_cshift_common.h
index c369fe1c..06e812d9 100644
--- a/lib/cshift/Grid_cshift_common.h
+++ b/lib/cshift/Grid_cshift_common.h
@@ -175,7 +175,6 @@ PARALLEL_NESTED_LOOP2
       if ( ocb&cbmask ) {
 	//lhs._odata[lo+o]=rhs._odata[ro+o];
 	vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
-	
       }
 
     }
@@ -217,8 +216,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int
 {
   int sshift[2];
 
-  sshift[0] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,0);
-  sshift[1] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,1);
+  sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
+  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
 
   if ( sshift[0] == sshift[1] ) {
     Cshift_local(ret,rhs,dimension,shift,0x3);
@@ -239,8 +238,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj>
   // Map to always positive shift modulo global full dimension.
   shift = (shift+fd)%fd;
 
-  ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift);
-        
+  ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
   // the permute type
   int permute_dim =grid->PermuteDim(dimension);
   int permute_type=grid->PermuteType(dimension);
@@ -250,11 +248,11 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj>
     int o   = 0;
     int bo  = x * grid->_ostride[dimension];
     
-    int cb= (cbmask==0x2)? 1 : 0;
+    int cb= (cbmask==0x2)? Odd : Even;
 
-    int sshift = grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
+    int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
     int sx     = (x+sshift)%rd;
-	
+
     int permute_slice=0;
     if(permute_dim){
       int wrap = sshift/rd;
diff --git a/lib/cshift/Grid_cshift_mpi.h b/lib/cshift/Grid_cshift_mpi.h
index 569017d8..8c0badcd 100644
--- a/lib/cshift/Grid_cshift_mpi.h
+++ b/lib/cshift/Grid_cshift_mpi.h
@@ -39,8 +39,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int
 {
   int sshift[2];
 
-  sshift[0] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,0);
-  sshift[1] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,1);
+  sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
+  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
 
   if ( sshift[0] == sshift[1] ) {
     Cshift_comms(ret,rhs,dimension,shift,0x3);
@@ -54,8 +54,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rh
 {
   int sshift[2];
 
-  sshift[0] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,0);
-  sshift[1] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,1);
+  sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
+  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
 
   if ( sshift[0] == sshift[1] ) {
     Cshift_comms_simd(ret,rhs,dimension,shift,0x3);
@@ -87,8 +87,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int
   std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
   std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
 
-  int cb= (cbmask==0x2)? 1 : 0;
-  int sshift= rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
+  int cb= (cbmask==0x2)? Odd : Even;
+  int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 
   for(int x=0;x<rd;x++){       
 
@@ -162,8 +162,8 @@ template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &r
   ///////////////////////////////////////////
   // Work out what to send where
   ///////////////////////////////////////////
-  int cb    = (cbmask==0x2)? 1 : 0;
-  int sshift= grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
+  int cb    = (cbmask==0x2)? Odd : Even;
+  int sshift= grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 
   // loop over outer coord planes orthog to dim
   for(int x=0;x<rd;x++){       
diff --git a/lib/cshift/Grid_cshift_none.h b/lib/cshift/Grid_cshift_none.h
index 3485a63e..be9467b1 100644
--- a/lib/cshift/Grid_cshift_none.h
+++ b/lib/cshift/Grid_cshift_none.h
@@ -4,7 +4,7 @@ namespace Grid {
 template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 {
   Lattice<vobj> ret(rhs._grid);
-  ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift);
+  ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
   Cshift_local(ret,rhs,dimension,shift);
   return ret;
 }
diff --git a/lib/lattice/Grid_lattice_peekpoke.h b/lib/lattice/Grid_lattice_peekpoke.h
index 7bffdbb8..9416226a 100644
--- a/lib/lattice/Grid_lattice_peekpoke.h
+++ b/lib/lattice/Grid_lattice_peekpoke.h
@@ -116,7 +116,7 @@ PARALLEL_FOR_LOOP
 
       int Nsimd = grid->Nsimd();
 
-      assert( l.checkerboard== l._grid->CheckerBoard(site));
+      assert( l.checkerboard == l._grid->CheckerBoard(site));
       assert( sizeof(sobj)*Nsimd == sizeof(vobj));
 
       int rank,odx,idx;
diff --git a/lib/lattice/Grid_lattice_transfer.h b/lib/lattice/Grid_lattice_transfer.h
index 27ae27e1..f0046b0e 100644
--- a/lib/lattice/Grid_lattice_transfer.h
+++ b/lib/lattice/Grid_lattice_transfer.h
@@ -32,7 +32,6 @@ PARALLEL_FOR_LOOP
       cbos=half._grid->CheckerBoard(coor);
       
       if (cbos==cb) {
-	
 	half._odata[ssh] = full._odata[ss];
 	ssh++;
       }
@@ -45,7 +44,7 @@ PARALLEL_FOR_LOOP
     for(int ss=0;ss<full._grid->oSites();ss++){
       std::vector<int> coor;
       int cbos;
-      
+
       full._grid->oCoorFromOindex(coor,ss);
       cbos=half._grid->CheckerBoard(coor);
       
diff --git a/lib/qcd/Grid_qcd_dirac.cc b/lib/qcd/Dirac.cc
similarity index 100%
rename from lib/qcd/Grid_qcd_dirac.cc
rename to lib/qcd/Dirac.cc
diff --git a/lib/qcd/Grid_qcd_dirac.h b/lib/qcd/Dirac.h
similarity index 100%
rename from lib/qcd/Grid_qcd_dirac.h
rename to lib/qcd/Dirac.h
diff --git a/lib/qcd/Grid_qcd_wilson_dop.cc b/lib/qcd/Grid_qcd_wilson_dop.cc
deleted file mode 100644
index 9a3f5f6a..00000000
--- a/lib/qcd/Grid_qcd_wilson_dop.cc
+++ /dev/null
@@ -1,217 +0,0 @@
-#include <Grid.h>
-
-namespace Grid {
-namespace QCD {
-
-const std::vector<int> WilsonMatrix::directions   ({0,1,2,3, 0, 1, 2, 3});
-const std::vector<int> WilsonMatrix::displacements({1,1,1,1,-1,-1,-1,-1});
-
-  int WilsonMatrix::HandOptDslash;
-
-  class WilsonCompressor {
-  public:
-    int mu;
-    int dag;
-
-    WilsonCompressor(int _dag){
-      mu=0;
-      dag=_dag;
-      assert((dag==0)||(dag==1));
-    }
-    void Point(int p) { 
-      mu=p;
-    };
-
-    vHalfSpinColourVector operator () (const vSpinColourVector &in)
-    {
-      vHalfSpinColourVector ret;
-      int mudag=mu;
-      if (dag) {
-	mudag=(mu+Nd)%(2*Nd);
-      }
-      switch(mudag) {
-      case Xp:
-	spProjXp(ret,in);
-	break;
-      case Yp:
-	spProjYp(ret,in);
-	break;
-      case Zp:
-	spProjZp(ret,in);
-	break;
-      case Tp:
-	spProjTp(ret,in);
-	break;
-      case Xm:
-	spProjXm(ret,in);
-	break;
-      case Ym:
-	spProjYm(ret,in);
-	break;
-      case Zm:
-	spProjZm(ret,in);
-	break;
-      case Tm:
-	spProjTm(ret,in);
-	break;
-      default: 
-	assert(0);
-	break;
-      }
-      return ret;
-    }
-  };
-
-  WilsonMatrix::WilsonMatrix(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid, double _mass)  : 
-    CheckerBoardedSparseMatrixBase<LatticeFermion>(&Fgrid,&Hgrid),
-    Stencil    (  _grid,npoint,Even,directions,displacements),
-    StencilEven(_cbgrid,npoint,Even,directions,displacements), // source is Even
-    StencilOdd (_cbgrid,npoint,Odd ,directions,displacements), // source is Odd
-    mass(_mass),
-    Umu(_grid),
-    UmuEven(_cbgrid),
-    UmuOdd (_cbgrid)
-  {
-    // Allocate the required comms buffer
-    comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
-    
-    DoubleStore(Umu,_Umu);
-    pickCheckerboard(Even,UmuEven,Umu);
-    pickCheckerboard(Odd ,UmuOdd,Umu);
-  }
-      
-void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
-{
-  LatticeColourMatrix U(_grid);
-
-  for(int mu=0;mu<Nd;mu++){
-    U = peekIndex<LorentzIndex>(Umu,mu);
-    pokeIndex<LorentzIndex>(Uds,U,mu);
-    U = adj(Cshift(U,mu,-1));
-    pokeIndex<LorentzIndex>(Uds,U,mu+4);
-  }
-}
-
-RealD WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard=in.checkerboard;
-  Dhop(in,out,DaggerNo);
-  out = (4+mass)*in - 0.5*out  ; // FIXME : axpby_norm! fusion fun
-  return norm2(out);
-}
-RealD WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard=in.checkerboard;
-  Dhop(in,out,DaggerYes);
-  out = (4+mass)*in - 0.5*out  ; // FIXME : axpby_norm! fusion fun
-  return norm2(out);
-}
-
-void WilsonMatrix::Meooe(const LatticeFermion &in, LatticeFermion &out)
-{
-  if ( in.checkerboard == Odd ) {
-    DhopEO(in,out,DaggerNo);
-  } else {
-    DhopOE(in,out,DaggerNo);
-  }
-  out = (-0.5)*out; // FIXME : scale factor in Dhop
-}
-void WilsonMatrix::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  if ( in.checkerboard == Odd ) {
-    DhopEO(in,out,DaggerYes);
-  } else {
-    DhopOE(in,out,DaggerYes);
-  }
-  out = (-0.5)*out; // FIXME : scale factor in Dhop
-}
-void WilsonMatrix::Mooee(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  out = (4.0+mass)*in;
-  return ;
-}
-void WilsonMatrix::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  Mooee(in,out);
-}
-void WilsonMatrix::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  out = (1.0/(4.0+mass))*in;
-  return ;
-}
-void WilsonMatrix::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  MooeeInv(in,out);
-}
-
-void WilsonMatrix::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
-				const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  assert((dag==DaggerNo) ||(dag==DaggerYes));
-  WilsonCompressor compressor(dag);
-  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
-
-  if ( dag == DaggerYes ) {
-    if( HandOptDslash ) {
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOptHand::DhopSiteDag(st,U,comm_buf,sss,in,out);
-      }
-    } else { 
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOpt::DhopSiteDag(st,U,comm_buf,sss,in,out);
-      }
-    }
-  } else {
-    if( HandOptDslash ) {
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOptHand::DhopSite(st,U,comm_buf,sss,in,out);
-      }
-    } else { 
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOpt::DhopSite(st,U,comm_buf,sss,in,out);
-      }
-    }
-  }
-}
-void WilsonMatrix::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  conformable(in._grid,_cbgrid);    // verifies half grid
-  conformable(in._grid,out._grid); // drops the cb check
-
-  assert(in.checkerboard==Even);
-  out.checkerboard = Odd;
-
-  DhopInternal(StencilEven,UmuOdd,in,out,dag);
-}
-void WilsonMatrix::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  conformable(in._grid,_cbgrid);    // verifies half grid
-  conformable(in._grid,out._grid); // drops the cb check
-
-  assert(in.checkerboard==Odd);
-  out.checkerboard = Even;
-
-  DhopInternal(StencilOdd,UmuEven,in,out,dag);
-}
-void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  conformable(in._grid,_grid); // verifies full grid
-  conformable(in._grid,out._grid);
-
-  out.checkerboard = in.checkerboard;
-
-  DhopInternal(Stencil,Umu,in,out,dag);
-}
-
-}}
-
-
-
diff --git a/lib/qcd/Grid_qcd_wilson_dop.h b/lib/qcd/Grid_qcd_wilson_dop.h
deleted file mode 100644
index 87418603..00000000
--- a/lib/qcd/Grid_qcd_wilson_dop.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef  GRID_QCD_WILSON_DOP_H
-#define  GRID_QCD_WILSON_DOP_H
-
-
-namespace Grid {
-
-  namespace QCD {
-
-  // Should be in header?
-    const int Xp = 0;
-    const int Yp = 1;
-    const int Zp = 2;
-    const int Tp = 3;
-    const int Xm = 4;
-    const int Ym = 5;
-    const int Zm = 6;
-    const int Tm = 7;
-
-    class WilsonMatrix : public CheckerBoardedSparseMatrixBase<LatticeFermion>
-    {
-      //NB r=1;
-    public:
-      static int HandOptDslash;
-
-      double                        mass;
-      //      GridBase                     *    grid; // Inherited
-      //      GridBase                     *  cbgrid;
-
-      //Defines the stencils for even and odd
-      CartesianStencil Stencil; 
-      CartesianStencil StencilEven; 
-      CartesianStencil StencilOdd; 
-
-      // Copy of the gauge field , with even and odd subsets
-      LatticeDoubledGaugeField Umu;
-      LatticeDoubledGaugeField UmuEven;
-      LatticeDoubledGaugeField UmuOdd;
-
-      static const int npoint=8;
-      static const std::vector<int> directions   ;
-      static const std::vector<int> displacements;
-      static const int Xp,Xm,Yp,Ym,Zp,Zm,Tp,Tm;
-
-      // Comms buffer
-      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  comm_buf;
-
-      // Constructor
-      WilsonMatrix(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,double _mass);
-
-      // DoubleStore
-      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
-
-      // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
-
-      // half checkerboard operaions
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
-
-      // non-hermitian hopping term; half cb or both
-      void Dhop  (const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField &U,
-			const LatticeFermion &in, LatticeFermion &out,int dag);
-
-      typedef iScalar<iMatrix<vComplex, Nc> > matrix;
-
-      
-    };
-
-
-    class DiracOpt {
-    public:
-      // These ones will need to be package intelligently. WilsonType base class
-      // for use by DWF etc..
-      static void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-		    std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-		    int ss,const LatticeFermion &in, LatticeFermion &out);
-      static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-		       std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-		       int ss,const LatticeFermion &in, LatticeFermion &out);
-
-    };
-    class DiracOptHand {
-    public:
-      // These ones will need to be package intelligently. WilsonType base class
-      // for use by DWF etc..
-      static void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-		    std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-		    int ss,const LatticeFermion &in, LatticeFermion &out);
-      static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-		       std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-		       int ss,const LatticeFermion &in, LatticeFermion &out);
-
-    };
-
-  }
-}
-#endif
diff --git a/lib/qcd/Grid_qcd.h b/lib/qcd/QCD.h
similarity index 97%
rename from lib/qcd/Grid_qcd.h
rename to lib/qcd/QCD.h
index 959f3529..7c45eb23 100644
--- a/lib/qcd/Grid_qcd.h
+++ b/lib/qcd/QCD.h
@@ -4,6 +4,16 @@ namespace Grid{
 
 namespace QCD {
 
+
+    static const int Xp = 0;
+    static const int Yp = 1;
+    static const int Zp = 2;
+    static const int Tp = 3;
+    static const int Xm = 4;
+    static const int Ym = 5;
+    static const int Zm = 6;
+    static const int Tm = 7;
+
     static const int Nc=3;
     static const int Ns=4;
     static const int Nd=4;
@@ -297,9 +307,8 @@ namespace QCD {
 }   //namespace QCD
 } // Grid
 
-#include <qcd/Grid_qcd_dirac.h>
-#include <qcd/Grid_qcd_2spinor.h>
-//#include <qcd/Grid_qcd_pauli.h>
-#include <qcd/Grid_qcd_wilson_dop.h>
+#include <qcd/Dirac.h>
+#include <qcd/TwoSpinor.h>
+#include <qcd/action/Actions.h>
 
 #endif
diff --git a/lib/qcd/Grid_qcd_2spinor.h b/lib/qcd/TwoSpinor.h
similarity index 100%
rename from lib/qcd/Grid_qcd_2spinor.h
rename to lib/qcd/TwoSpinor.h
diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
new file mode 100644
index 00000000..c4e8a2f0
--- /dev/null
+++ b/lib/qcd/action/Actions.h
@@ -0,0 +1,10 @@
+#ifndef GRID_QCD_ACTIONS_H
+#define GRID_QCD_ACTIONS_H
+
+#include <qcd/action/fermion/FermionAction.h>
+#include <qcd/action/fermion/WilsonCompressor.h>
+#include <qcd/action/fermion/WilsonKernels.h>
+#include <qcd/action/fermion/WilsonFermion.h>
+#include <qcd/action/fermion/FiveDimWilsonFermion.h>
+
+#endif
diff --git a/lib/qcd/action/fermion/FermionAction.h b/lib/qcd/action/fermion/FermionAction.h
new file mode 100644
index 00000000..1b05174b
--- /dev/null
+++ b/lib/qcd/action/fermion/FermionAction.h
@@ -0,0 +1,47 @@
+#ifndef  GRID_QCD_WILSON_DOP_H
+#define  GRID_QCD_WILSON_DOP_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    //////////////////////////////////////////////////////////////////////////////
+    // Four component fermions
+    // Should type template the vector and gauge types
+    // Think about multiple representations
+    //////////////////////////////////////////////////////////////////////////////
+    template<class FermionField,class GaugeField>
+    class FermionAction : public CheckerBoardedSparseMatrixBase<FermionField>
+    {
+    public:
+
+      GridBase * Grid(void)   { return FermionGrid(); };   // this is all the linalg routines need to know
+      GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
+
+      virtual GridBase *FermionGrid(void)         =0;
+      virtual GridBase *FermionRedBlackGrid(void) =0;
+      virtual GridBase *GaugeGrid(void)           =0;
+      virtual GridBase *GaugeRedBlackGrid(void)   =0;
+
+      // override multiply
+      virtual RealD  M    (const FermionField &in, FermionField &out)=0;
+      virtual RealD  Mdag (const FermionField &in, FermionField &out)=0;
+
+      // half checkerboard operaions
+      virtual void   Meooe       (const FermionField &in, FermionField &out)=0;
+      virtual void   MeooeDag    (const FermionField &in, FermionField &out)=0;
+      virtual void   Mooee       (const FermionField &in, FermionField &out)=0;
+      virtual void   MooeeDag    (const FermionField &in, FermionField &out)=0;
+      virtual void   MooeeInv    (const FermionField &in, FermionField &out)=0;
+      virtual void   MooeeInvDag (const FermionField &in, FermionField &out)=0;
+
+      // non-hermitian hopping term; half cb or both
+      virtual void Dhop  (const FermionField &in, FermionField &out,int dag)=0;
+      virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
+      virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
+
+    };
+
+  }
+}
+#endif
diff --git a/lib/qcd/action/fermion/FiveDimWilsonFermion.cc b/lib/qcd/action/fermion/FiveDimWilsonFermion.cc
new file mode 100644
index 00000000..43645899
--- /dev/null
+++ b/lib/qcd/action/fermion/FiveDimWilsonFermion.cc
@@ -0,0 +1,228 @@
+#include <Grid.h>
+
+namespace Grid {
+namespace QCD {
+  
+  // S-direction is INNERMOST and takes no part in the parity.
+  const std::vector<int> FiveDimWilsonFermion::directions   ({1,2,3,4, 1, 2, 3, 4});
+  const std::vector<int> FiveDimWilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1});
+
+  int FiveDimWilsonFermion::HandOptDslash;
+
+  // 5d lattice for DWF.
+  FiveDimWilsonFermion::FiveDimWilsonFermion(LatticeGaugeField &_Umu,
+					   GridCartesian         &FiveDimGrid,
+					   GridRedBlackCartesian &FiveDimRedBlackGrid,
+					   GridCartesian         &FourDimGrid,
+					   GridRedBlackCartesian &FourDimRedBlackGrid,
+					   double _mass) :
+  _FiveDimGrid(&FiveDimGrid),
+  _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
+  _FourDimGrid(&FourDimGrid),
+  _FourDimRedBlackGrid(&FourDimRedBlackGrid),
+  Stencil    (_FiveDimGrid,npoint,Even,directions,displacements),
+  StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
+  StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
+  mass(_mass),
+  Umu(_FourDimGrid),
+  UmuEven(_FourDimRedBlackGrid),
+  UmuOdd (_FourDimRedBlackGrid),
+  Lebesgue(_FourDimGrid),
+  LebesgueEvenOdd(_FourDimRedBlackGrid)
+{
+  // some assertions
+  assert(FiveDimGrid._ndimension==5);
+  assert(FourDimGrid._ndimension==4);
+  
+  assert(FiveDimRedBlackGrid._ndimension==5);
+  assert(FourDimRedBlackGrid._ndimension==4);
+
+  assert(FiveDimRedBlackGrid._checker_dim==1);
+
+  // Dimension zero of the five-d is the Ls direction
+  Ls=FiveDimGrid._fdimensions[0];
+  assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
+  assert(FiveDimRedBlackGrid._processors[0] ==1);
+  assert(FiveDimRedBlackGrid._simd_layout[0]==1);
+  assert(FiveDimGrid._processors[0]         ==1);
+  assert(FiveDimGrid._simd_layout[0]        ==1);
+
+  // Other dimensions must match the decomposition of the four-D fields 
+  for(int d=0;d<4;d++){
+    assert(FourDimRedBlackGrid._fdimensions[d]  ==FourDimGrid._fdimensions[d]);
+    assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
+
+    assert(FourDimRedBlackGrid._processors[d]   ==FourDimGrid._processors[d]);
+    assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
+
+    assert(FourDimRedBlackGrid._simd_layout[d]  ==FourDimGrid._simd_layout[d]);
+    assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
+
+    assert(FiveDimGrid._fdimensions[d+1]        ==FourDimGrid._fdimensions[d]);
+    assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
+    assert(FiveDimGrid._simd_layout[d+1]        ==FourDimGrid._simd_layout[d]);
+  }
+
+  // Allocate the required comms buffer
+  comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
+  
+  DoubleStore(Umu,_Umu);
+  pickCheckerboard(Even,UmuEven,Umu);
+  pickCheckerboard(Odd ,UmuOdd,Umu);
+}
+void FiveDimWilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
+{
+  conformable(Uds._grid,GaugeGrid());
+  conformable(Umu._grid,GaugeGrid());
+  LatticeColourMatrix U(GaugeGrid());
+  for(int mu=0;mu<Nd;mu++){
+    U = peekIndex<LorentzIndex>(Umu,mu);
+    pokeIndex<LorentzIndex>(Uds,U,mu);
+    U = adj(Cshift(U,mu,-1));
+    pokeIndex<LorentzIndex>(Uds,U,mu+4);
+  }
+}
+
+RealD FiveDimWilsonFermion::M(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard=in.checkerboard;
+  Dhop(in,out,DaggerNo);
+  return axpy_norm(out,5.0-M5,in,out);
+}
+RealD FiveDimWilsonFermion::Mdag(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard=in.checkerboard;
+  Dhop(in,out,DaggerYes);
+  return axpy_norm(out,5.0-M5,in,out);
+}
+void FiveDimWilsonFermion::Meooe(const LatticeFermion &in, LatticeFermion &out)
+{
+  if ( in.checkerboard == Odd ) {
+    DhopEO(in,out,DaggerNo);
+  } else {
+    DhopOE(in,out,DaggerNo);
+  }
+}
+void FiveDimWilsonFermion::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  if ( in.checkerboard == Odd ) {
+    DhopEO(in,out,DaggerYes);
+  } else {
+    DhopOE(in,out,DaggerYes);
+  }
+}
+void FiveDimWilsonFermion::Mooee(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  out = (5.0-M5)*in;
+  return ;
+}
+void FiveDimWilsonFermion::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  Mooee(in,out);
+}
+void FiveDimWilsonFermion::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  out = (1.0/(5.0-M5))*in;
+  return ;
+}
+void FiveDimWilsonFermion::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  MooeeInv(in,out);
+}
+void FiveDimWilsonFermion::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
+					LatticeDoubledGaugeField & U,
+					const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  assert((dag==DaggerNo) ||(dag==DaggerYes));
+
+  WilsonCompressor compressor(dag);
+
+  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
+  
+  // Dhop takes the 4d grid from U, and makes a 5d index for fermion
+  // Not loop ordering and data layout.
+  // Designed to create 
+  // - per thread reuse in L1 cache for U
+  // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
+  if ( dag == DaggerYes ) {
+    if( HandOptDslash ) {
+      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=lo.Reorder(ss);
+PARALLEL_FOR_LOOP
+	for(int s=0;s<Ls;s++){
+	  int sF = s+Ls*sU;
+	  DiracOptHand::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
+	}
+      }
+    } else { 
+      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=lo.Reorder(ss);
+PARALLEL_FOR_LOOP
+	for(int s=0;s<Ls;s++){
+	  int sF = s+Ls*sU;
+	  DiracOpt::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
+	}
+      }
+    }
+  } else {
+    if( HandOptDslash ) {
+
+PARALLEL_FOR_LOOP
+      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=lo.Reorder(ss);
+	for(int s=0;s<Ls;s++){
+	  int sF = s+Ls*sU;
+	  DiracOptHand::DhopSite(st,U,comm_buf,sF,sU,in,out);
+	}
+      }
+
+    } else { 
+      for(int ss=0;ss<U._grid->oSites();ss++){
+	int sU=lo.Reorder(ss);
+PARALLEL_FOR_LOOP
+	for(int s=0;s<Ls;s++){
+	  int sF = s+Ls*sU; 
+	  DiracOpt::DhopSite(st,U,comm_buf,sF,sU,in,out);
+	}
+      }
+    }
+  }
+}
+void FiveDimWilsonFermion::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  conformable(in._grid,FermionRedBlackGrid());    // verifies half grid
+  conformable(in._grid,out._grid); // drops the cb check
+
+  assert(in.checkerboard==Even);
+  out.checkerboard = Odd;
+
+  DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
+}
+void FiveDimWilsonFermion::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  conformable(in._grid,FermionRedBlackGrid());    // verifies half grid
+  conformable(in._grid,out._grid); // drops the cb check
+
+  assert(in.checkerboard==Odd);
+  out.checkerboard = Even;
+
+  DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
+}
+void FiveDimWilsonFermion::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  conformable(in._grid,FermionGrid()); // verifies full grid
+  conformable(in._grid,out._grid);
+
+  out.checkerboard = in.checkerboard;
+
+  DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
+}
+
+}}
+
+
+
diff --git a/lib/qcd/action/fermion/FiveDimWilsonFermion.h b/lib/qcd/action/fermion/FiveDimWilsonFermion.h
new file mode 100644
index 00000000..3aa85023
--- /dev/null
+++ b/lib/qcd/action/fermion/FiveDimWilsonFermion.h
@@ -0,0 +1,108 @@
+#ifndef  GRID_QCD_DWF_H
+#define  GRID_QCD_DWF_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // This is the 4d red black case appropriate to support
+    //
+    // parity = (x+y+z+t)|2;
+    // generalised five dim fermions like mobius, zolotarev etc..	
+    //
+    // i.e. even even contains fifth dim hopping term.
+    //
+    // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
+    ////////////////////////////////////////////////////////////////////////////////
+    class FiveDimWilsonFermion : public FermionAction<LatticeFermion,LatticeGaugeField>
+    {
+    public:
+      ///////////////////////////////////////////////////////////////
+      // Implement the abstract base
+      ///////////////////////////////////////////////////////////////
+      GridBase *GaugeGrid(void)              { return _FourDimGrid ;}
+      GridBase *GaugeRedBlackGrid(void)      { return _FourDimRedBlackGrid ;}
+      GridBase *FermionGrid(void)            { return _FiveDimGrid;}
+      GridBase *FermionRedBlackGrid(void)    { return _FiveDimRedBlackGrid;}
+
+      // override multiply
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+
+      // half checkerboard operaions
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+
+      // non-hermitian hopping term; half cb or both
+      void Dhop  (const LatticeFermion &in, LatticeFermion &out,int dag);
+      void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
+
+      ///////////////////////////////////////////////////////////////
+      // New methods added 
+      ///////////////////////////////////////////////////////////////
+      void DhopInternal(CartesianStencil & st,
+			LebesgueOrder &lo,
+			LatticeDoubledGaugeField &U,
+			const LatticeFermion &in, 
+			LatticeFermion &out,
+			int dag);
+
+      // Constructors
+      FiveDimWilsonFermion(LatticeGaugeField &_Umu,
+			  GridCartesian         &FiveDimGrid,
+			  GridRedBlackCartesian &FiveDimRedBlackGrid,
+			  GridCartesian         &FourDimGrid,
+			  GridRedBlackCartesian &FourDimRedBlackGrid,
+			  double _mass);
+
+      // DoubleStore
+      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
+
+      ///////////////////////////////////////////////////////////////
+      // Data members require to support the functionality
+      ///////////////////////////////////////////////////////////////
+      static int HandOptDslash; // these are a temporary hack
+
+    protected:
+
+      // Add these to the support from Wilson
+      GridBase *_FourDimGrid;
+      GridBase *_FourDimRedBlackGrid;
+      GridBase *_FiveDimGrid;
+      GridBase *_FiveDimRedBlackGrid;
+
+      static const int npoint=8;
+      static const std::vector<int> directions   ;
+      static const std::vector<int> displacements;
+
+      double                        M5;
+      double                        mass;
+      int Ls;
+
+      //Defines the stencils for even and odd
+      CartesianStencil Stencil; 
+      CartesianStencil StencilEven; 
+      CartesianStencil StencilOdd; 
+
+      // Copy of the gauge field , with even and odd subsets
+      LatticeDoubledGaugeField Umu;
+      LatticeDoubledGaugeField UmuEven;
+      LatticeDoubledGaugeField UmuOdd;
+
+      LebesgueOrder Lebesgue;
+      LebesgueOrder LebesgueEvenOdd;
+
+      // Comms buffer
+      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  comm_buf;
+      
+    };
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h
new file mode 100644
index 00000000..d9fe977a
--- /dev/null
+++ b/lib/qcd/action/fermion/WilsonCompressor.h
@@ -0,0 +1,61 @@
+#ifndef  GRID_QCD_WILSON_COMPRESSOR_H
+#define  GRID_QCD_WILSON_COMPRESSOR_H
+
+namespace Grid {
+namespace QCD {
+
+  class WilsonCompressor {
+  public:
+    int mu;
+    int dag;
+
+    WilsonCompressor(int _dag){
+      mu=0;
+      dag=_dag;
+      assert((dag==0)||(dag==1));
+    }
+    void Point(int p) { 
+      mu=p;
+    };
+
+    vHalfSpinColourVector operator () (const vSpinColourVector &in)
+    {
+      vHalfSpinColourVector ret;
+      int mudag=mu;
+      if (dag) {
+	mudag=(mu+Nd)%(2*Nd);
+      }
+      switch(mudag) {
+      case Xp:
+	spProjXp(ret,in);
+	break;
+      case Yp:
+	spProjYp(ret,in);
+	break;
+      case Zp:
+	spProjZp(ret,in);
+	break;
+      case Tp:
+	spProjTp(ret,in);
+	break;
+      case Xm:
+	spProjXm(ret,in);
+	break;
+      case Ym:
+	spProjYm(ret,in);
+	break;
+      case Zm:
+	spProjZm(ret,in);
+	break;
+      case Tm:
+	spProjTm(ret,in);
+	break;
+      default: 
+	assert(0);
+	break;
+      }
+      return ret;
+    }
+  };
+}} // namespace close
+#endif
diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc
new file mode 100644
index 00000000..aa30a7fa
--- /dev/null
+++ b/lib/qcd/action/fermion/WilsonFermion.cc
@@ -0,0 +1,163 @@
+#include <Grid.h>
+
+namespace Grid {
+namespace QCD {
+
+const std::vector<int> WilsonFermion::directions   ({0,1,2,3, 0, 1, 2, 3});
+const std::vector<int> WilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1});
+
+int WilsonFermion::HandOptDslash;
+
+WilsonFermion::WilsonFermion(LatticeGaugeField &_Umu,
+			   GridCartesian         &Fgrid,
+			   GridRedBlackCartesian &Hgrid, 
+			   double _mass) :
+  _grid(&Fgrid),
+  _cbgrid(&Hgrid),
+  Stencil    (&Fgrid,npoint,Even,directions,displacements),
+  StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even
+  StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd
+  mass(_mass),
+  Umu(&Fgrid),
+  UmuEven(&Hgrid),
+  UmuOdd (&Hgrid)
+{
+  // Allocate the required comms buffer
+  comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
+  DoubleStore(Umu,_Umu);
+  pickCheckerboard(Even,UmuEven,Umu);
+  pickCheckerboard(Odd ,UmuOdd,Umu);
+}
+      
+void WilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
+{
+  conformable(Uds._grid,GaugeGrid());
+  conformable(Umu._grid,GaugeGrid());
+  LatticeColourMatrix U(GaugeGrid());
+  for(int mu=0;mu<Nd;mu++){
+    U = peekIndex<LorentzIndex>(Umu,mu);
+    pokeIndex<LorentzIndex>(Uds,U,mu);
+    U = adj(Cshift(U,mu,-1));
+    pokeIndex<LorentzIndex>(Uds,U,mu+4);
+  }
+}
+
+RealD WilsonFermion::M(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard=in.checkerboard;
+  Dhop(in,out,DaggerNo);
+  return axpy_norm(out,4+mass,in,out);
+}
+RealD WilsonFermion::Mdag(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard=in.checkerboard;
+  Dhop(in,out,DaggerYes);
+  return axpy_norm(out,4+mass,in,out);
+}
+
+void WilsonFermion::Meooe(const LatticeFermion &in, LatticeFermion &out)
+{
+  if ( in.checkerboard == Odd ) {
+    DhopEO(in,out,DaggerNo);
+  } else {
+    DhopOE(in,out,DaggerNo);
+  }
+}
+void WilsonFermion::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  if ( in.checkerboard == Odd ) {
+    DhopEO(in,out,DaggerYes);
+  } else {
+    DhopOE(in,out,DaggerYes);
+  }
+}
+void WilsonFermion::Mooee(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  out = (4.0+mass)*in;
+  return ;
+}
+void WilsonFermion::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  Mooee(in,out);
+}
+void WilsonFermion::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  out = (1.0/(4.0+mass))*in;
+  return ;
+}
+void WilsonFermion::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
+{
+  out.checkerboard = in.checkerboard;
+  MooeeInv(in,out);
+}
+
+void WilsonFermion::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
+				const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  assert((dag==DaggerNo) ||(dag==DaggerYes));
+  WilsonCompressor compressor(dag);
+  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
+
+  if ( dag == DaggerYes ) {
+    if( HandOptDslash ) {
+PARALLEL_FOR_LOOP
+      for(int sss=0;sss<in._grid->oSites();sss++){
+        DiracOptHand::DhopSiteDag(st,U,comm_buf,sss,sss,in,out);
+      }
+    } else { 
+PARALLEL_FOR_LOOP
+      for(int sss=0;sss<in._grid->oSites();sss++){
+        DiracOpt::DhopSiteDag(st,U,comm_buf,sss,sss,in,out);
+      }
+    }
+  } else {
+    if( HandOptDslash ) {
+PARALLEL_FOR_LOOP
+      for(int sss=0;sss<in._grid->oSites();sss++){
+        DiracOptHand::DhopSite(st,U,comm_buf,sss,sss,in,out);
+      }
+    } else { 
+PARALLEL_FOR_LOOP
+      for(int sss=0;sss<in._grid->oSites();sss++){
+        DiracOpt::DhopSite(st,U,comm_buf,sss,sss,in,out);
+      }
+    }
+  }
+}
+void WilsonFermion::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  conformable(in._grid,_cbgrid);    // verifies half grid
+  conformable(in._grid,out._grid); // drops the cb check
+
+  assert(in.checkerboard==Even);
+  out.checkerboard = Odd;
+
+  DhopInternal(StencilEven,UmuOdd,in,out,dag);
+}
+void WilsonFermion::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  conformable(in._grid,_cbgrid);    // verifies half grid
+  conformable(in._grid,out._grid); // drops the cb check
+
+  assert(in.checkerboard==Odd);
+  out.checkerboard = Even;
+
+  DhopInternal(StencilOdd,UmuEven,in,out,dag);
+}
+void WilsonFermion::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  conformable(in._grid,_grid); // verifies full grid
+  conformable(in._grid,out._grid);
+
+  out.checkerboard = in.checkerboard;
+
+  DhopInternal(Stencil,Umu,in,out,dag);
+}
+
+}}
+
+
+
diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h
new file mode 100644
index 00000000..5c208131
--- /dev/null
+++ b/lib/qcd/action/fermion/WilsonFermion.h
@@ -0,0 +1,87 @@
+#ifndef  GRID_QCD_WILSON_FERMION_H
+#define  GRID_QCD_WILSON_FERMION_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    class WilsonFermion : public FermionAction<LatticeFermion,LatticeGaugeField>
+    {
+    public:
+
+      ///////////////////////////////////////////////////////////////
+      // Implement the abstract base
+      ///////////////////////////////////////////////////////////////
+      GridBase *GaugeGrid(void)              { return _grid ;}
+      GridBase *GaugeRedBlackGrid(void)      { return _cbgrid ;}
+      GridBase *FermionGrid(void)            { return _grid;}
+      GridBase *FermionRedBlackGrid(void)    { return _cbgrid;}
+
+      // override multiply
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+
+      // half checkerboard operaions
+      void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out); // remain virtual so we 
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out); // can derive Clover
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out); // from Wilson bas
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+
+      // non-hermitian hopping term; half cb or both
+      void Dhop  (const LatticeFermion &in, LatticeFermion &out,int dag);
+      void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
+
+      ///////////////////////////////////////////////////////////////
+      // Extra methods added by derived
+      ///////////////////////////////////////////////////////////////
+      void DhopInternal(CartesianStencil & st,
+			LatticeDoubledGaugeField &U,
+			const LatticeFermion &in, 
+			LatticeFermion &out,
+			int dag);
+
+      // Constructor
+      WilsonFermion(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,double _mass);
+
+      // DoubleStore
+      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
+
+      ///////////////////////////////////////////////////////////////
+      // Data members require to support the functionality
+      ///////////////////////////////////////////////////////////////
+      static int HandOptDslash; // these are a temporary hack
+      static int MortonOrder;
+
+    protected:
+
+      double                        mass;
+
+      GridBase                     *    _grid; 
+      GridBase                     *  _cbgrid;
+
+      static const int npoint=8;
+      static const std::vector<int> directions   ;
+      static const std::vector<int> displacements;
+
+      //Defines the stencils for even and odd
+      CartesianStencil Stencil; 
+      CartesianStencil StencilEven; 
+      CartesianStencil StencilOdd; 
+
+      // Copy of the gauge field , with even and odd subsets
+      LatticeDoubledGaugeField Umu;
+      LatticeDoubledGaugeField UmuEven;
+      LatticeDoubledGaugeField UmuOdd;
+
+      // Comms buffer
+      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  comm_buf;
+
+      
+    };
+
+  }
+}
+#endif
diff --git a/lib/qcd/Grid_qcd_dhop.cc b/lib/qcd/action/fermion/WilsonKernels.cc
similarity index 87%
rename from lib/qcd/Grid_qcd_dhop.cc
rename to lib/qcd/action/fermion/WilsonKernels.cc
index 1e5dcd16..879bbb7a 100644
--- a/lib/qcd/Grid_qcd_dhop.cc
+++ b/lib/qcd/action/fermion/WilsonKernels.cc
@@ -4,8 +4,8 @@ namespace Grid {
 namespace QCD {
 
 void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			    std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			    int ss,const LatticeFermion &in, LatticeFermion &out)
+			std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
+			int sF,int sU,const LatticeFermion &in, LatticeFermion &out)
 {
     vHalfSpinColourVector  tmp;    
     vHalfSpinColourVector  chi;    
@@ -16,6 +16,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     //#define VERBOSE( A)  if ( ss<10 ) { std::cout << "site " <<ss << " " #A " neigh " << offset << " perm "<< perm <<std::endl;}    
 
     // Xp
+    int ss = sF;
     offset = st._offsets [Xp][ss];
     local  = st._is_local[Xp][ss];
     perm   = st._permute[Xp][ss];
@@ -29,7 +30,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Xp),&chi());
+    mult(&Uchi(),&U._odata[sU](Xp),&chi());
     spReconXp(result,Uchi);
 
     //    std::cout << "XP_RECON"<<std::endl;
@@ -51,7 +52,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Yp),&chi());
+    mult(&Uchi(),&U._odata[sU](Yp),&chi());
     accumReconYp(result,Uchi);
 
     // Zp
@@ -67,7 +68,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Zp),&chi());
+    mult(&Uchi(),&U._odata[sU](Zp),&chi());
     accumReconZp(result,Uchi);
 
     // Tp
@@ -83,7 +84,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Tp),&chi());
+    mult(&Uchi(),&U._odata[sU](Tp),&chi());
     accumReconTp(result,Uchi);
 
     // Xm
@@ -101,7 +102,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Xm),&chi());
+    mult(&Uchi(),&U._odata[sU](Xm),&chi());
     accumReconXm(result,Uchi);
     //  std::cout << "XM_RECON_ACCUM"<<std::endl;
     //    std::cout << result()(0)(0) <<" "<<result()(0)(1) <<" "<<result()(0)(2) <<std::endl;
@@ -124,7 +125,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Ym),&chi());
+    mult(&Uchi(),&U._odata[sU](Ym),&chi());
     accumReconYm(result,Uchi);
 
     // Zm
@@ -140,7 +141,7 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Zm),&chi());
+    mult(&Uchi(),&U._odata[sU](Zm),&chi());
     accumReconZm(result,Uchi);
 
     // Tm
@@ -156,15 +157,15 @@ void DiracOpt::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Tm),&chi());
+    mult(&Uchi(),&U._odata[sU](Tm),&chi());
     accumReconTm(result,Uchi);
 
-    vstream(out._odata[ss],result);
+    vstream(out._odata[ss],result*(-0.5));
 }
 
 void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			       std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			       int ss,const LatticeFermion &in, LatticeFermion &out)
+			   std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
+			   int sF,int sU,const LatticeFermion &in, LatticeFermion &out)
 {
     vHalfSpinColourVector  tmp;    
     vHalfSpinColourVector  chi;    
@@ -173,6 +174,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     int offset,local,perm, ptype;
 
     // Xp
+    int ss=sF;
     offset = st._offsets [Xm][ss];
     local  = st._is_local[Xm][ss];
     perm   = st._permute[Xm][ss];
@@ -186,7 +188,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Xm),&chi());
+    mult(&Uchi(),&U._odata[sU](Xm),&chi());
     spReconXp(result,Uchi);
 
     // Yp
@@ -202,7 +204,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Ym),&chi());
+    mult(&Uchi(),&U._odata[sU](Ym),&chi());
     accumReconYp(result,Uchi);
 
     // Zp
@@ -218,7 +220,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Zm),&chi());
+    mult(&Uchi(),&U._odata[sU](Zm),&chi());
     accumReconZp(result,Uchi);
 
     // Tp
@@ -234,7 +236,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Tm),&chi());
+    mult(&Uchi(),&U._odata[sU](Tm),&chi());
     accumReconTp(result,Uchi);
 
     // Xm
@@ -252,7 +254,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Xp),&chi());
+    mult(&Uchi(),&U._odata[sU](Xp),&chi());
     accumReconXm(result,Uchi);
 
     // Ym
@@ -269,7 +271,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Yp),&chi());
+    mult(&Uchi(),&U._odata[sU](Yp),&chi());
     accumReconYm(result,Uchi);
 
     // Zm
@@ -285,7 +287,7 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Zp),&chi());
+    mult(&Uchi(),&U._odata[sU](Zp),&chi());
     accumReconZm(result,Uchi);
 
     // Tm
@@ -301,9 +303,9 @@ void DiracOpt::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[ss](Tp),&chi());
+    mult(&Uchi(),&U._odata[sU](Tp),&chi());
     accumReconTm(result,Uchi);
 
-    vstream(out._odata[ss],result);
+    vstream(out._odata[ss],result*(-0.5));
 }
 }}
diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h
new file mode 100644
index 00000000..cc535022
--- /dev/null
+++ b/lib/qcd/action/fermion/WilsonKernels.h
@@ -0,0 +1,42 @@
+#ifndef  GRID_QCD_DHOP_H
+#define  GRID_QCD_DHOP_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Helper classes that implement Wilson stencil for a single site.
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+    // Generic version works for any Nc and with extra flavour indices
+    class DiracOpt {
+    public:
+      // These ones will need to be package intelligently. WilsonType base class
+      // for use by DWF etc..
+      static void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
+			   std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
+			   int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
+      static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
+			      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
+			      int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
+
+    };
+
+    // Hand unrolled for Nc=3, one flavour
+    class DiracOptHand {
+    public:
+      // These ones will need to be package intelligently. WilsonType base class
+      // for use by DWF etc..
+      static void DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
+			   std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
+			   int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
+      static void DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
+			      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
+			      int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
+
+    };
+
+  }
+}
+#endif
diff --git a/lib/qcd/Grid_qcd_dhop_hand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc
similarity index 92%
rename from lib/qcd/Grid_qcd_dhop_hand.cc
rename to lib/qcd/action/fermion/WilsonKernelsHand.cc
index f8d464fb..019b668b 100644
--- a/lib/qcd/Grid_qcd_dhop_hand.cc
+++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc
@@ -27,7 +27,7 @@
     Chi_12 = ref()(1)(2);
 
 #define MULT_2SPIN(A)\
-   auto & ref(U._odata[ss](A));	\
+   auto & ref(U._odata[sU](A));	\
     U_00 = ref()(0,0);\
     U_10 = ref()(1,0);\
     U_20 = ref()(2,0);\
@@ -282,7 +282,7 @@ namespace QCD {
 
 void DiracOptHand::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
 			    std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			    int ss,const LatticeFermion &in, LatticeFermion &out)
+			    int sF,int sU,const LatticeFermion &in, LatticeFermion &out)
 {
   REGISTER vComplex result_00; // 12 regs on knc
   REGISTER vComplex result_01;
@@ -338,7 +338,8 @@ void DiracOptHand::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
 
 
   int offset,local,perm, ptype;
-
+  int ss=sF;
+  
   // Xp
   offset = st._offsets [Xp][ss];
   local  = st._is_local[Xp][ss];
@@ -514,24 +515,24 @@ void DiracOptHand::DhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
 
   {
     vSpinColourVector & ref (out._odata[ss]);
-    vstream(ref()(0)(0),result_00);
-    vstream(ref()(0)(1),result_01);
-    vstream(ref()(0)(2),result_02);
-    vstream(ref()(1)(0),result_10);
-    vstream(ref()(1)(1),result_11);
-    vstream(ref()(1)(2),result_12);
-    vstream(ref()(2)(0),result_20);
-    vstream(ref()(2)(1),result_21);
-    vstream(ref()(2)(2),result_22);
-    vstream(ref()(3)(0),result_30);
-    vstream(ref()(3)(1),result_31);
-    vstream(ref()(3)(2),result_32);
+    vstream(ref()(0)(0),result_00*(-0.5));
+    vstream(ref()(0)(1),result_01*(-0.5));
+    vstream(ref()(0)(2),result_02*(-0.5));
+    vstream(ref()(1)(0),result_10*(-0.5));
+    vstream(ref()(1)(1),result_11*(-0.5));
+    vstream(ref()(1)(2),result_12*(-0.5));
+    vstream(ref()(2)(0),result_20*(-0.5));
+    vstream(ref()(2)(1),result_21*(-0.5));
+    vstream(ref()(2)(2),result_22*(-0.5));
+    vstream(ref()(3)(0),result_30*(-0.5));
+    vstream(ref()(3)(1),result_31*(-0.5));
+    vstream(ref()(3)(2),result_32*(-0.5));
   }
 }
 
 void DiracOptHand::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
 			       std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			       int ss,const LatticeFermion &in, LatticeFermion &out)
+			       int ss,int sU,const LatticeFermion &in, LatticeFermion &out)
 {
   REGISTER vComplex result_00; // 12 regs on knc
   REGISTER vComplex result_01;
@@ -752,18 +753,18 @@ void DiracOptHand::DhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
 
   {
     vSpinColourVector & ref (out._odata[ss]);
-    vstream(ref()(0)(0),result_00);
-    vstream(ref()(0)(1),result_01);
-    vstream(ref()(0)(2),result_02);
-    vstream(ref()(1)(0),result_10);
-    vstream(ref()(1)(1),result_11);
-    vstream(ref()(1)(2),result_12);
-    vstream(ref()(2)(0),result_20);
-    vstream(ref()(2)(1),result_21);
-    vstream(ref()(2)(2),result_22);
-    vstream(ref()(3)(0),result_30);
-    vstream(ref()(3)(1),result_31);
-    vstream(ref()(3)(2),result_32);
+    vstream(ref()(0)(0),result_00*(-0.5));
+    vstream(ref()(0)(1),result_01*(-0.5));
+    vstream(ref()(0)(2),result_02*(-0.5));
+    vstream(ref()(1)(0),result_10*(-0.5));
+    vstream(ref()(1)(1),result_11*(-0.5));
+    vstream(ref()(1)(2),result_12*(-0.5));
+    vstream(ref()(2)(0),result_20*(-0.5));
+    vstream(ref()(2)(1),result_21*(-0.5));
+    vstream(ref()(2)(2),result_22*(-0.5));
+    vstream(ref()(3)(0),result_30*(-0.5));
+    vstream(ref()(3)(1),result_31*(-0.5));
+    vstream(ref()(3)(2),result_32*(-0.5));
   }
 }
 }}
diff --git a/lib/stencil/Grid_lebesgue.cc b/lib/stencil/Grid_lebesgue.cc
new file mode 100644
index 00000000..977e3562
--- /dev/null
+++ b/lib/stencil/Grid_lebesgue.cc
@@ -0,0 +1,103 @@
+#include <Grid.h>
+
+namespace Grid {
+
+int LebesgueOrder::UseLebesgueOrder;
+
+LebesgueOrder::IndexInteger LebesgueOrder::alignup(IndexInteger n){
+  n--;           // 1000 0011 --> 1000 0010
+  n |= n >> 1;   // 1000 0010 | 0100 0001 = 1100 0011
+  n |= n >> 2;   // 1100 0011 | 0011 0000 = 1111 0011
+  n |= n >> 4;   // 1111 0011 | 0000 1111 = 1111 1111
+  n |= n >> 8;   // ... (At this point all bits are 1, so further bitwise-or
+  n |= n >> 16;  //      operations produce no effect.)
+  n++;           // 1111 1111 --> 1 0000 0000
+  return n;
+};
+
+LebesgueOrder::LebesgueOrder(GridBase *grid) 
+{
+  _LebesgueReorder.resize(0);
+  
+  // Align up dimensions to power of two.
+  const IndexInteger one=1;
+  IndexInteger ND = grid->_ndimension;
+  std::vector<IndexInteger> dims(ND);
+  std::vector<IndexInteger> adims(ND);
+  std::vector<std::vector<IndexInteger> > bitlist(ND);
+  
+  for(IndexInteger mu=0;mu<ND;mu++){
+    dims[mu] = grid->_rdimensions[mu];
+    assert ( dims[mu] != 0 );
+    adims[mu] = alignup(dims[mu]);
+  }
+  
+  // List which bits of padded volume coordinate contribute; this strategy 
+  // i) avoids recursion 
+  // ii) has loop lengths at most the width of a 32 bit word.
+  int sitebit=0;
+  int split=2;
+  for(int mu=0;mu<ND;mu++){   // mu 0 takes bit 0; mu 1 takes bit 1 etc...
+    for(int bit=0;bit<split;bit++){
+      IndexInteger mask = one<<bit;
+      if ( mask&(adims[mu]-1) ){
+	bitlist[mu].push_back(sitebit);
+	sitebit++;
+      }
+    }
+  }
+  for(int bit=split;bit<32;bit++){
+    IndexInteger mask = one<<bit;
+    for(int mu=0;mu<ND;mu++){   // mu 0 takes bit 0; mu 1 takes bit 1 etc...
+      if ( mask&(adims[mu]-1) ){
+	bitlist[mu].push_back(sitebit);
+	sitebit++;
+      }
+    }
+  }
+  
+  // Work out padded and unpadded volumes
+  IndexInteger avol = 1;
+  for(int mu=0;mu<ND;mu++) avol = avol * adims[mu];
+  
+  IndexInteger vol = 1;
+  for(int mu=0;mu<ND;mu++) vol = vol * dims[mu];
+  
+  // Loop over padded volume, following Lebesgue curve
+  // We interleave the bits from sequential "mu".
+  std::vector<IndexInteger> ax(ND);
+  
+  for(IndexInteger asite=0;asite<avol;asite++){
+    
+    // Start with zero and collect bits
+    for(int mu=0;mu<ND;mu++) ax[mu] = 0;
+    
+    int contained = 1;
+    for(int mu=0;mu<ND;mu++){
+      
+      // Build the coordinate on the aligned volume
+      for(int bit=0;bit<bitlist[mu].size();bit++){
+	int sbit=bitlist[mu][bit];
+	
+	if(asite&(one<<sbit)){
+	  ax[mu]|=one<<bit;
+	}
+      }
+      
+      // Is it contained in original box
+      if ( ax[mu]>dims[mu]-1 ) contained = 0;
+      
+    }
+    
+    if ( contained ) {
+      int site = ax[0]
+	+        dims[0]*ax[1]
+	+dims[0]*dims[1]*ax[2]
+	+dims[0]*dims[1]*dims[2]*ax[3];
+      
+      _LebesgueReorder.push_back(site);
+    }
+	}
+  assert( _LebesgueReorder.size() == vol );
+}
+}
diff --git a/lib/stencil/Grid_lebesgue.h b/lib/stencil/Grid_lebesgue.h
new file mode 100644
index 00000000..1d59b127
--- /dev/null
+++ b/lib/stencil/Grid_lebesgue.h
@@ -0,0 +1,29 @@
+#ifndef GRID_LEBESGUE_H
+#define GRID_LEBESGUE_H
+
+#include<vector>
+
+// Lebesgue, Morton, Z-graph ordering assistance
+namespace Grid {
+  
+  class LebesgueOrder { 
+  public:
+
+    static int UseLebesgueOrder;
+
+    typedef uint32_t IndexInteger;
+
+    inline IndexInteger Reorder(IndexInteger ss) { 
+      return UseLebesgueOrder ? _LebesgueReorder[ss] : ss; 
+    };
+
+    IndexInteger alignup(IndexInteger n);
+
+    LebesgueOrder(GridBase *grid);
+
+  private:
+    std::vector<IndexInteger> _LebesgueReorder;
+
+  };    
+}
+#endif
diff --git a/lib/stencil/Grid_stencil_common.cc b/lib/stencil/Grid_stencil_common.cc
index 6cbcc890..f0f8c581 100644
--- a/lib/stencil/Grid_stencil_common.cc
+++ b/lib/stencil/Grid_stencil_common.cc
@@ -3,95 +3,6 @@
 namespace Grid {
 
 
-
-void CartesianStencil::LebesgueOrder(void) 
-{
-  _LebesgueReorder.resize(0);
-  
-  // Align up dimensions to power of two.
-  const StencilInteger one=1;
-  StencilInteger ND = _grid->_ndimension;
-  std::vector<StencilInteger> dims(ND);
-  std::vector<StencilInteger> adims(ND);
-  std::vector<std::vector<StencilInteger> > bitlist(ND);
-
-
-  for(StencilInteger mu=0;mu<ND;mu++){
-    dims[mu] = _grid->_rdimensions[mu];
-    assert ( dims[mu] != 0 );
-    adims[mu] = alignup(dims[mu]);
-  }
-
-  // List which bits of padded volume coordinate contribute; this strategy 
-  // i) avoids recursion 
-  // ii) has loop lengths at most the width of a 32 bit word.
-  int sitebit=0;
-  int split=24;
-  for(int mu=0;mu<ND;mu++){   // mu 0 takes bit 0; mu 1 takes bit 1 etc...
-    for(int bit=0;bit<split;bit++){
-    StencilInteger mask = one<<bit;
-      if ( mask&(adims[mu]-1) ){
-	bitlist[mu].push_back(sitebit);
-	sitebit++;
-      }
-    }
-  }
-  for(int bit=split;bit<32;bit++){
-    StencilInteger mask = one<<bit;
-    for(int mu=0;mu<ND;mu++){   // mu 0 takes bit 0; mu 1 takes bit 1 etc...
-      if ( mask&(adims[mu]-1) ){
-	bitlist[mu].push_back(sitebit);
-	sitebit++;
-      }
-    }
-  }
-
-  // Work out padded and unpadded volumes
-  StencilInteger avol = 1;
-  for(int mu=0;mu<ND;mu++) avol = avol * adims[mu];
-
-  StencilInteger vol = 1;
-  for(int mu=0;mu<ND;mu++) vol = vol * dims[mu];
-  
-  // Loop over padded volume, following Lebesgue curve
-  // We interleave the bits from sequential "mu".
-  std::vector<StencilInteger> ax(ND);
-  
-  for(StencilInteger asite=0;asite<avol;asite++){
-
-    // Start with zero and collect bits
-    for(int mu=0;mu<ND;mu++) ax[mu] = 0;
-
-    int contained = 1;
-    for(int mu=0;mu<ND;mu++){
-
-      // Build the coordinate on the aligned volume
-      for(int bit=0;bit<bitlist[mu].size();bit++){
-	int sbit=bitlist[mu][bit];
-
-	if(asite&(one<<sbit)){
-	  ax[mu]|=one<<bit;
-	}
-      }
-
-      // Is it contained in original box
-      if ( ax[mu]>dims[mu]-1 ) contained = 0;
-
-    }
-
-    if ( contained ) {
-      int site = ax[0]
-	+        dims[0]*ax[1]
-        +dims[0]*dims[1]*ax[2]
-        +dims[0]*dims[1]*dims[2]*ax[3];
-
-      _LebesgueReorder.push_back(site);
-    }
-  }
-
-  assert( _LebesgueReorder.size() == vol );
-}
-
   CartesianStencil::CartesianStencil(GridBase *grid,
 				     int npoints,
 				     int checkerboard,
@@ -110,8 +21,6 @@ void CartesianStencil::LebesgueOrder(void)
       _unified_buffer_size=0;
       _request_count =0;
 
-      LebesgueOrder();
-
       int osites  = _grid->oSites();
 
       for(int i=0;i<npoints;i++){
@@ -143,8 +52,8 @@ void CartesianStencil::LebesgueOrder(void)
 	// up a table containing the npoint "neighbours" and whether they 
 	// live in lattice or a comms buffer.
 	if ( !comm_dim ) {
-	  sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,0);
-	  sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
+	  sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Even);
+	  sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Odd);
 
 	  if ( sshift[0] == sshift[1] ) {
 	    Local(point,dimension,shift,0x3);
@@ -154,8 +63,8 @@ void CartesianStencil::LebesgueOrder(void)
 	  }
 	} else { // All permute extract done in comms phase prior to Stencil application
 	  //        So tables are the same whether comm_dim or splice_dim
-	  sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,0);
-	  sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,1);
+	  sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Even);
+	  sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Odd);
 	  if ( sshift[0] == sshift[1] ) {
 	    Comms(point,dimension,shift,0x3);
 	  } else {
@@ -185,7 +94,7 @@ void CartesianStencil::LebesgueOrder(void)
 	int o   = 0;
 	int bo  = x * _grid->_ostride[dimension];
 	
-	int cb= (cbmask==0x2)? 1 : 0;
+	int cb= (cbmask==0x2)? Odd : Even;
 	  
 	int sshift = _grid->CheckerBoardShift(_checkerboard,dimension,shift,cb);
 	int sx     = (x+sshift)%rd;
@@ -224,7 +133,7 @@ void CartesianStencil::LebesgueOrder(void)
       _comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
                                            // send to one or more remote nodes.
 
-      int cb= (cbmask==0x2)? 1 : 0;
+      int cb= (cbmask==0x2)? Odd : Even;
       int sshift= _grid->CheckerBoardShift(_checkerboard,dimension,shift,cb);
       
       for(int x=0;x<rd;x++){       
diff --git a/scripts/linecount b/scripts/linecount
index c0731b23..350abf1c 100755
--- a/scripts/linecount
+++ b/scripts/linecount
@@ -1,3 +1,3 @@
 #!/bin/sh 
 
-wc -l */*.h */*/*.h */*/*/*.h */*.cc */*/*.cc */*/*/*.cc
\ No newline at end of file
+wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc
\ No newline at end of file
diff --git a/tests/Grid_cshift_red_black.cc b/tests/Grid_cshift_red_black.cc
new file mode 100644
index 00000000..212e9b6e
--- /dev/null
+++ b/tests/Grid_cshift_red_black.cc
@@ -0,0 +1,165 @@
+#include <Grid.h>
+#include <parallelIO/GridNerscIO.h>
+
+using namespace Grid;
+using namespace Grid::QCD;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  std::vector<int> latt_size   = GridDefaultLatt();
+  int Nd = latt_size.size();
+  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
+  std::vector<int> mpi_layout  = GridDefaultMpi();
+
+  std::vector<int> mask(Nd,1);
+  mask[0]=0;
+
+  GridCartesian         Fine  (latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian RBFine(latt_size,simd_layout,mpi_layout,mask,1);
+
+  GridParallelRNG      FineRNG(&Fine);  FineRNG.SeedRandomDevice();
+
+  LatticeComplex U(&Fine);
+  LatticeComplex ShiftU(&Fine);
+  LatticeComplex rbShiftU(&Fine);
+  LatticeComplex Ue(&RBFine); 
+  LatticeComplex Uo(&RBFine);
+  LatticeComplex ShiftUe(&RBFine);
+  LatticeComplex ShiftUo(&RBFine);
+  LatticeComplex lex(&Fine);
+  lex=zero;
+  Integer stride =1;
+  {
+    double nrm;
+    LatticeComplex coor(&Fine);
+
+    for(int d=0;d<Nd;d++){
+      //      Integer i=10000;
+      Integer i=0;
+      LatticeCoordinate(coor,d);
+      lex = lex + coor*stride+i;
+      stride=stride*latt_size[d];
+    }
+    U=lex;
+  }
+
+  pickCheckerboard(Even,Ue,U);
+  pickCheckerboard(Odd,Uo,U);
+
+  //  std::cout << U<<std::endl;
+  std::cout << "Ue " <<norm2(Ue)<<std::endl;
+  std::cout << "Uo " <<norm2(Uo)<<std::endl;
+
+
+  TComplex cm;
+  for(int dir=0;dir<Nd;dir++){
+    if ( dir!=1 ) continue;
+    for(int shift=0;shift<latt_size[dir];shift++){
+
+	std::cout<<"Shifting by "<<shift<<" in direction"<<dir<<std::endl;
+
+	//	std::cout<<"Even grid"<<std::endl;
+	ShiftUe = Cshift(Ue,dir,shift);    // Shift everything cb by cb
+	//	std::cout << "\tShiftUe " <<norm2(ShiftUe)<<std::endl;
+
+	//	std::cout<<"Odd grid"<<std::endl;
+	ShiftUo = Cshift(Uo,dir,shift);    
+	//	std::cout << "\tShiftUo " <<norm2(ShiftUo)<<std::endl;
+
+	//	std::cout<<"Recombined Even/Odd grids"<<std::endl;
+	setCheckerboard(rbShiftU,ShiftUe);
+	setCheckerboard(rbShiftU,ShiftUo);
+	//	std::cout << "\trbShiftU " <<norm2(rbShiftU)<<std::endl;
+
+	//	std::cout<<"Full grid shift"<<std::endl;
+	ShiftU  = Cshift(U,dir,shift);    // Shift everything
+	//	std::cout << "\tShiftU " <<norm2(rbShiftU)<<std::endl;
+
+	std::vector<int> coor(4);
+
+	std::cout << "Checking the non-checkerboard shift"<<std::endl;
+	for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
+	for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
+	for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
+	for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
+	  
+	  peekSite(cm,ShiftU,coor);
+
+	  /////////	  double nrm=norm2(U);
+
+	  std::vector<int> scoor(coor);
+	  scoor[dir] = (scoor[dir]+shift)%latt_size[dir];
+	  
+	  Integer slex = scoor[0]
+	    + latt_size[0]*scoor[1]
+	    + latt_size[0]*latt_size[1]*scoor[2]
+	    + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
+
+	  Complex scm(slex);
+	  
+	  double nrm = abs(scm-cm()()());
+	  std::vector<int> peer(4);
+	  int index=real(cm);
+	  Fine.CoorFromIndex(peer,index,latt_size);
+
+	  if (nrm > 0){
+	    std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
+		     <<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
+		     << cm()()()<<" expect "<<scm<<"  "<<nrm<<std::endl;
+	    std::cerr<<"Got    "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
+	    index=real(scm);
+	    Fine.CoorFromIndex(peer,index,latt_size);
+	    std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
+	    exit(-1);
+	  }
+	}}}}
+
+
+	std::cout << "Checking the checkerboard shift"<<std::endl;
+	for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
+	for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
+	for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
+	for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
+	  
+	  peekSite(cm,rbShiftU,coor);
+
+	  double nrm=norm2(U);
+
+	  std::vector<int> scoor(coor);
+	  scoor[dir] = (scoor[dir]+shift)%latt_size[dir];
+	  
+	  Integer slex = scoor[0]
+	    + latt_size[0]*scoor[1]
+	    + latt_size[0]*latt_size[1]*scoor[2]
+	    + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
+
+	  Complex scm(slex);
+	  
+	  nrm = abs(scm-cm()()());
+	  std::vector<int> peer(4);
+	  int index=real(cm);
+	  Fine.CoorFromIndex(peer,index,latt_size);
+
+	  if (nrm > 0){
+	    std::cerr<<"FAIL shift "<< shift<<" in dir "<< dir
+		     <<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
+		     << cm()()()<<" expect "<<scm<<"  "<<nrm<<std::endl;
+	    std::cerr<<"Got    "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
+	    index=real(scm);
+	    Fine.CoorFromIndex(peer,index,latt_size);
+	    std::cerr<<"Expect "<<index<<" " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
+	    exit(-1);
+	  } else if (0) { 
+	    std::cout<<"PASS shift "<< shift<<" in dir "<< dir
+		     <<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "
+		     << cm()()()<<" expect "<<scm<<"  "<<nrm<<std::endl;
+	  }
+	}}}}
+
+    }
+  }
+
+  Grid_finalize();
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 3c79191c..7ef4eb1f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 #
 # Test code
 #
-bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma  Grid_simd Grid_rng Grid_remez Grid_rng_fixed 
+bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma  Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_cshift_red_black 
 
 Grid_main_SOURCES = Grid_main.cc
 Grid_main_LDADD = -lGrid
@@ -25,6 +25,9 @@ Grid_nersc_io_LDADD = -lGrid
 Grid_cshift_SOURCES = Grid_cshift.cc
 Grid_cshift_LDADD = -lGrid
 
+Grid_cshift_red_black_SOURCES = Grid_cshift_red_black.cc
+Grid_cshift_red_black_LDADD = -lGrid
+
 Grid_gamma_SOURCES = Grid_gamma.cc
 Grid_gamma_LDADD = -lGrid
 

From 8272e15bd65e439864b91877d69641c1fb5a1062 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Sun, 31 May 2015 15:11:09 +0100
Subject: [PATCH 03/20] Updated line counter

---
 scripts/linecount | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/linecount b/scripts/linecount
index 350abf1c..7624c15f 100755
--- a/scripts/linecount
+++ b/scripts/linecount
@@ -1,3 +1,3 @@
 #!/bin/sh 
 
-wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc
\ No newline at end of file
+wc -l lib/*.h lib/*/*.h lib/*/*/*.h lib/*.cc lib/*/*.cc lib/*/*/*.cc tests/*.cc benchmarks/*.cc lib/*/*/*/*.cc lib/*/*/*/*.h
\ No newline at end of file

From ce8c7a77b60ffa2d418097663125fc61163bf018 Mon Sep 17 00:00:00 2001
From: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Date: Sun, 31 May 2015 18:50:08 +0100
Subject: [PATCH 04/20] Bug in Makefile.am fixed

---
 lib/Makefile.am | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/Makefile.am b/lib/Makefile.am
index aa531df7..e983b432 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -103,7 +103,6 @@ nobase_include_HEADERS=\
 		./qcd/Dirac.h\
 		./qcd/QCD.h\
 		./qcd/TwoSpinor.h\
-		./qcd/FermionAction.h\
 		./simd/Grid_avx.h\
 		./simd/Grid_avx512.h\
 		./simd/Grid_qpx.h\

From 9ea64767b05bc4c468da089e2c49bbc6813c2fa5 Mon Sep 17 00:00:00 2001
From: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Date: Sun, 31 May 2015 22:50:03 +0100
Subject: [PATCH 05/20] No compile fix on mpi target

---
 lib/cshift/Grid_cshift_mpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/cshift/Grid_cshift_mpi.h b/lib/cshift/Grid_cshift_mpi.h
index 8c0badcd..3b3fad29 100644
--- a/lib/cshift/Grid_cshift_mpi.h
+++ b/lib/cshift/Grid_cshift_mpi.h
@@ -17,7 +17,7 @@ template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int s
   // Map to always positive shift modulo global full dimension.
   shift = (shift+fd)%fd;
 
-  ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift);
+  ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);
         
   // the permute type
   int simd_layout     = rhs._grid->_simd_layout[dimension];

From 4c617c36430977fecec6366104e3d3b27d69caf0 Mon Sep 17 00:00:00 2001
From: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Date: Mon, 1 Jun 2015 12:25:59 +0100
Subject: [PATCH 06/20] Const safety

---
 lib/cshift/Grid_cshift_common.h |  8 ++++----
 lib/cshift/Grid_cshift_mpi.h    | 10 +++++-----
 lib/cshift/Grid_cshift_none.h   |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/cshift/Grid_cshift_common.h b/lib/cshift/Grid_cshift_common.h
index 06e812d9..ffa3f771 100644
--- a/lib/cshift/Grid_cshift_common.h
+++ b/lib/cshift/Grid_cshift_common.h
@@ -153,7 +153,7 @@ PARALLEL_NESTED_LOOP2
 //////////////////////////////////////////////////////
 // local to node block strided copies
 //////////////////////////////////////////////////////
-template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
+template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
 {
   int rd = rhs._grid->_rdimensions[dimension];
 
@@ -182,7 +182,7 @@ PARALLEL_NESTED_LOOP2
   
 }
 
-template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
+template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
 {
  
   int rd = rhs._grid->_rdimensions[dimension];
@@ -212,7 +212,7 @@ PARALLEL_NESTED_LOOP2
 //////////////////////////////////////////////////////
 // Local to node Cshift
 //////////////////////////////////////////////////////
-template<class vobj> void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
+template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
 {
   int sshift[2];
 
@@ -227,7 +227,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int
   }
 }
 
-template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
+template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 {
   GridBase *grid = rhs._grid;
   int fd = grid->_fdimensions[dimension];
diff --git a/lib/cshift/Grid_cshift_mpi.h b/lib/cshift/Grid_cshift_mpi.h
index 3b3fad29..9cdfd316 100644
--- a/lib/cshift/Grid_cshift_mpi.h
+++ b/lib/cshift/Grid_cshift_mpi.h
@@ -4,7 +4,7 @@
 
 namespace Grid { 
 
-template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
+template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
 {
   typedef typename vobj::vector_type vector_type;
   typedef typename vobj::scalar_type scalar_type;
@@ -35,7 +35,7 @@ template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int s
   return ret;
 }
 
-template<class vobj> void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
+template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
 {
   int sshift[2];
 
@@ -50,7 +50,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int
   }
 }
 
-template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
+template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
 {
   int sshift[2];
 
@@ -65,7 +65,7 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rh
   }
 }
 
-template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
+template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 {
   typedef typename vobj::vector_type vector_type;
   typedef typename vobj::scalar_type scalar_type;
@@ -124,7 +124,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int
   }
 }
 
-template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
+template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 {
   GridBase *grid=rhs._grid;
   const int Nsimd = grid->Nsimd();
diff --git a/lib/cshift/Grid_cshift_none.h b/lib/cshift/Grid_cshift_none.h
index be9467b1..a8c6f6c0 100644
--- a/lib/cshift/Grid_cshift_none.h
+++ b/lib/cshift/Grid_cshift_none.h
@@ -1,7 +1,7 @@
 #ifndef _GRID_CSHIFT_NONE_H_
 #define _GRID_CSHIFT_NONE_H_
 namespace Grid {
-template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
+template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
 {
   Lattice<vobj> ret(rhs._grid);
   ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension);

From 8bd9fb4427350dcf900825da19a630ba6e89ad46 Mon Sep 17 00:00:00 2001
From: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Date: Mon, 1 Jun 2015 12:26:20 +0100
Subject: [PATCH 07/20] FIx miistake

---
 lib/lattice/Grid_lattice_base.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/lattice/Grid_lattice_base.h b/lib/lattice/Grid_lattice_base.h
index 4a6d3180..15ef0944 100644
--- a/lib/lattice/Grid_lattice_base.h
+++ b/lib/lattice/Grid_lattice_base.h
@@ -176,7 +176,7 @@ PARALLEL_FOR_LOOP
 PARALLEL_FOR_LOOP
     for(int ss=0;ss<_grid->oSites();ss++){
 #ifdef STREAMING_STORES
-      vobj tmp = eval(tmp,ss,expr);
+      vobj tmp = eval(ss,expr);
       vstream(_odata[ss] ,tmp);
 #else
       _odata[ss]=eval(ss,expr);

From 2583570e17e95591d0f9dac4bddf18d3c1f6260b Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 2 Jun 2015 16:57:12 +0100
Subject: [PATCH 08/20] Domain wall fermions now invert ; have the basis set up
 for Tanh/Zolo * (Cayley/PartFrac/ContFrac) * (Mobius/Shamir/Wilson) Approx   
     Representation               Kernel.

All are done with space-time taking part in checkerboarding, Ls uncheckerboarded

Have only so far tested the Domain Wall limit of mobius, and at that only checked
that it
i)  Inverts
ii) 5dim DW == Ls copies of 4dim D2
iii) MeeInv Mee == 1
iv) Meo+Mee+Moe+Moo == M unprec.
v) MpcDagMpc is hermitan
vi) Mdag is the adjoint of M between stochastic vectors.

That said, the RB schur solve, RB MpcDagMpc solve, Unprec solve
all converge and the true residual becomes small; so pretty good tests.
---
 benchmarks/Grid_dwf.cc                        |  79 +++---
 benchmarks/Grid_dwf_cg_prec.cc                |  58 +++++
 benchmarks/Grid_dwf_cg_schur.cc               |  53 ++++
 benchmarks/Grid_dwf_cg_unprec.cc              |  53 ++++
 benchmarks/Grid_dwf_even_odd.cc               | 207 +++++++++++++++
 benchmarks/Grid_wilson_evenodd.cc             |   5 +-
 benchmarks/Makefile.am                        |  46 +++-
 lib/Grid_init.cc                              |   2 +-
 lib/Makefile.am                               |  16 +-
 lib/algorithms/LinearOperator.h               |  32 ---
 lib/algorithms/approx/Zolotarev.cc            |   9 +-
 lib/algorithms/approx/Zolotarev.h             |   9 +-
 lib/cartesian/Grid_cartesian_base.h           |   2 +-
 lib/cartesian/Grid_cartesian_full.h           |   6 +-
 lib/cartesian/Grid_cartesian_red_black.h      |  24 +-
 lib/communicator/Grid_communicator_base.h     |   2 +-
 lib/communicator/Grid_communicator_mpi.cc     |   2 +-
 lib/communicator/Grid_communicator_none.cc    |   2 +-
 lib/qcd/LinalgUtils.h                         | 113 +++++++++
 lib/qcd/QCD.h                                 |   2 +
 lib/qcd/SpaceTimeGrid.cc                      |  52 ++++
 lib/qcd/SpaceTimeGrid.h                       |  18 ++
 lib/qcd/action/Actions.h                      |  78 +++++-
 lib/qcd/action/fermion/CayleyFermion5D.cc     | 235 ++++++++++++++++++
 lib/qcd/action/fermion/CayleyFermion5D.h      |  61 +++++
 .../fermion/ContinuedFractionFermion5D.cc     | 119 +++++++++
 .../fermion/ContinuedFractionFermion5D.h      |  53 ++++
 lib/qcd/action/fermion/DomainWallFermion.h    | 118 +++++++++
 .../{FermionAction.h => FermionOperator.h}    |   7 +-
 .../fermion/PartialFractionFermion5D.cc       |  47 ++++
 .../action/fermion/PartialFractionFermion5D.h |  49 ++++
 lib/qcd/action/fermion/WilsonFermion.cc       |   6 +-
 lib/qcd/action/fermion/WilsonFermion.h        |   6 +-
 ...DimWilsonFermion.cc => WilsonFermion5D.cc} | 106 +++-----
 ...veDimWilsonFermion.h => WilsonFermion5D.h} |  31 +--
 lib/stencil/Grid_stencil_common.cc            |  12 +-
 36 files changed, 1500 insertions(+), 220 deletions(-)
 create mode 100644 benchmarks/Grid_dwf_cg_prec.cc
 create mode 100644 benchmarks/Grid_dwf_cg_schur.cc
 create mode 100644 benchmarks/Grid_dwf_cg_unprec.cc
 create mode 100644 benchmarks/Grid_dwf_even_odd.cc
 create mode 100644 lib/qcd/LinalgUtils.h
 create mode 100644 lib/qcd/SpaceTimeGrid.cc
 create mode 100644 lib/qcd/SpaceTimeGrid.h
 create mode 100644 lib/qcd/action/fermion/CayleyFermion5D.cc
 create mode 100644 lib/qcd/action/fermion/CayleyFermion5D.h
 create mode 100644 lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
 create mode 100644 lib/qcd/action/fermion/ContinuedFractionFermion5D.h
 create mode 100644 lib/qcd/action/fermion/DomainWallFermion.h
 rename lib/qcd/action/fermion/{FermionAction.h => FermionOperator.h} (92%)
 create mode 100644 lib/qcd/action/fermion/PartialFractionFermion5D.cc
 create mode 100644 lib/qcd/action/fermion/PartialFractionFermion5D.h
 rename lib/qcd/action/fermion/{FiveDimWilsonFermion.cc => WilsonFermion5D.cc} (68%)
 rename lib/qcd/action/fermion/{FiveDimWilsonFermion.h => WilsonFermion5D.h} (72%)

diff --git a/benchmarks/Grid_dwf.cc b/benchmarks/Grid_dwf.cc
index eb1d9299..62869b17 100644
--- a/benchmarks/Grid_dwf.cc
+++ b/benchmarks/Grid_dwf.cc
@@ -24,43 +24,28 @@ int main (int argc, char ** argv)
   std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 
   std::vector<int> latt4 = GridDefaultLatt();
-  std::vector<int> simd4 = GridDefaultSimd(Nd,vComplexF::Nsimd());
-  std::vector<int> mpi4  = GridDefaultMpi();
-
-  assert(latt4.size()==4 ); 
-  assert(simd4.size()==4 );
-  assert(mpi4.size() ==4 );
-
-  const int Ls=1;
-  std::vector<int> latt5({Ls,latt4[0],latt4[1],latt4[2],latt4[3]});
-  std::vector<int> simd5({1 ,simd4[0],simd4[1],simd4[2],simd4[3]}); 
-  std::vector<int>  mpi5({1 , mpi4[0], mpi4[1], mpi4[2], mpi4[3]}); 
-  std::vector<int>   cb5({0,1,1,1,1}); // Checkerboard 4d only
-  int                cbd=1;            // use dim-1 to reduce
-
-  // Four dim grid for gauge field U
-  GridCartesian               UGrid(latt4,simd4,mpi4); 
-  GridRedBlackCartesian     UrbGrid(&UGrid);
-
-  // Five dim grid for fermions F
-  GridCartesian               FGrid(latt5,simd5,mpi5); 
-  GridRedBlackCartesian     FrbGrid(latt5,simd5,mpi5,cb5,cbd); 
+  const int Ls=8;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 
   std::vector<int> seeds4({1,2,3,4});
   std::vector<int> seeds5({5,6,7,8});
 
-  GridParallelRNG          RNG5(&FGrid);  RNG5.SeedFixedIntegers(seeds5);
-  LatticeFermion src   (&FGrid); random(RNG5,src);
-  LatticeFermion result(&FGrid); result=zero;
-  LatticeFermion    ref(&FGrid);    ref=zero;
-  LatticeFermion    tmp(&FGrid);
-  LatticeFermion    err(&FGrid);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+
+  LatticeFermion src   (FGrid); random(RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeFermion    ref(FGrid);    ref=zero;
+  LatticeFermion    tmp(FGrid);
+  LatticeFermion    err(FGrid);
 
   ColourMatrix cm = Complex(1.0,0.0);
 
-  GridParallelRNG          RNG4(&UGrid);  RNG4.SeedFixedIntegers(seeds4);
-  LatticeGaugeField Umu(&UGrid); random(RNG4,Umu);
-  LatticeGaugeField Umu5d(&FGrid); 
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+  LatticeGaugeField Umu5d(FGrid); 
 
   // replicate across fifth dimension
   for(int ss=0;ss<Umu._grid->oSites();ss++){
@@ -72,7 +57,7 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   // Naive wilson implementation
   ////////////////////////////////////
-  std::vector<LatticeColourMatrix> U(4,&FGrid);
+  std::vector<LatticeColourMatrix> U(4,FGrid);
   for(int mu=0;mu<Nd;mu++){
     U[mu] = peekIndex<LorentzIndex>(Umu5d,mu);
   }
@@ -93,17 +78,17 @@ int main (int argc, char ** argv)
   }
 
   RealD mass=0.1;
-  FiveDimWilsonFermion Dw(Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass);
+  RealD M5  =1.8;
+  DomainWallFermion Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
   
   std::cout << "Calling Dw"<<std::endl;
-  int ncall=1000;
+  int ncall=10;
   double t0=usecond();
   for(int i=0;i<ncall;i++){
     Dw.Dhop(src,result,0);
   }
   double t1=usecond();
 
-
   double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
   double flops=1344*volume*ncall;
   
@@ -141,5 +126,31 @@ int main (int argc, char ** argv)
   err = ref-result; 
   std::cout << "norm diff   "<< norm2(err)<<std::endl;
 
+  LatticeFermion src_e (FrbGrid);
+  LatticeFermion src_o (FrbGrid);
+  LatticeFermion r_e   (FrbGrid);
+  LatticeFermion r_o   (FrbGrid);
+  LatticeFermion r_eo  (FGrid);
+
+
+  std::cout << "Calling Deo and Doe"<<std::endl;
+  pickCheckerboard(Even,src_e,src);
+  pickCheckerboard(Odd,src_o,src);
+
+  Dw.DhopEO(src_o,r_e,DaggerNo);
+  Dw.DhopOE(src_e,r_o,DaggerNo);
+  Dw.Dhop(src,result,DaggerNo);
+
+  setCheckerboard(r_eo,r_o);
+  setCheckerboard(r_eo,r_e);
+
+  err = r_eo-result; 
+  std::cout << "norm diff   "<< norm2(err)<<std::endl;
+
+  pickCheckerboard(Even,src_e,err);
+  pickCheckerboard(Odd,src_o,err);
+  std::cout << "norm diff even  "<< norm2(src_e)<<std::endl;
+  std::cout << "norm diff odd   "<< norm2(src_o)<<std::endl;
+
   Grid_finalize();
 }
diff --git a/benchmarks/Grid_dwf_cg_prec.cc b/benchmarks/Grid_dwf_cg_prec.cc
new file mode 100644
index 00000000..0cf86a19
--- /dev/null
+++ b/benchmarks/Grid_dwf_cg_prec.cc
@@ -0,0 +1,58 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  const int Ls=8;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeFermion    src(FGrid); random(RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+  for(int mu=0;mu<Nd;mu++){
+    U[mu] = peekIndex<LorentzIndex>(Umu,mu);
+  }
+  
+  RealD mass=0.1;
+  RealD M5=1.8;
+  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  LatticeFermion    src_o(FrbGrid);
+  LatticeFermion result_o(FrbGrid);
+  pickCheckerboard(Odd,src_o,src);
+  result_o=zero;
+
+  HermitianCheckerBoardedOperator<DomainWallFermion,LatticeFermion> HermOpEO(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  CG(HermOpEO,src_o,result_o);
+
+  Grid_finalize();
+}
diff --git a/benchmarks/Grid_dwf_cg_schur.cc b/benchmarks/Grid_dwf_cg_schur.cc
new file mode 100644
index 00000000..aac4d3fd
--- /dev/null
+++ b/benchmarks/Grid_dwf_cg_schur.cc
@@ -0,0 +1,53 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  const int Ls=8;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeFermion    src(FGrid); random(RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+  for(int mu=0;mu<Nd;mu++){
+    U[mu] = peekIndex<LorentzIndex>(Umu,mu);
+  }
+  
+  RealD mass=0.1;
+  RealD M5=1.8;
+  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  SchurRedBlackSolve<LatticeFermion> SchurSolver(CG);
+  SchurSolver(Ddwf,src,result);
+
+  Grid_finalize();
+}
diff --git a/benchmarks/Grid_dwf_cg_unprec.cc b/benchmarks/Grid_dwf_cg_unprec.cc
new file mode 100644
index 00000000..5c9e7ad3
--- /dev/null
+++ b/benchmarks/Grid_dwf_cg_unprec.cc
@@ -0,0 +1,53 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  const int Ls=8;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeFermion    src(FGrid); random(RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+  for(int mu=0;mu<Nd;mu++){
+    U[mu] = peekIndex<LorentzIndex>(Umu,mu);
+  }
+  
+  RealD mass=0.1;
+  RealD M5=1.8;
+  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  HermitianOperator<DomainWallFermion,LatticeFermion> HermOp(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  CG(HermOp,src,result);
+
+  Grid_finalize();
+}
diff --git a/benchmarks/Grid_dwf_even_odd.cc b/benchmarks/Grid_dwf_even_odd.cc
new file mode 100644
index 00000000..ac47bbf9
--- /dev/null
+++ b/benchmarks/Grid_dwf_even_odd.cc
@@ -0,0 +1,207 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+
+  const int Ls=8;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+
+  LatticeFermion src   (FGrid); random(RNG5,src);
+  LatticeFermion phi   (FGrid); random(RNG5,phi);
+  LatticeFermion chi   (FGrid); random(RNG5,chi);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeFermion    ref(FGrid);    ref=zero;
+  LatticeFermion    tmp(FGrid);    tmp=zero;
+  LatticeFermion    err(FGrid);    tmp=zero;
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+
+  // Only one non-zero (y)
+  Umu=zero;
+  for(int nn=0;nn<Nd;nn++){
+    random(RNG4,U[nn]);
+    if ( nn>0 ) 
+      U[nn]=zero;
+    pokeIndex<LorentzIndex>(Umu,U[nn],nn);
+  }
+
+  RealD mass=0.1;
+  RealD M5  =1.8;
+  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  LatticeFermion src_e (FrbGrid);
+  LatticeFermion src_o (FrbGrid);
+  LatticeFermion r_e   (FrbGrid);
+  LatticeFermion r_o   (FrbGrid);
+  LatticeFermion r_eo  (FGrid);
+  LatticeFermion r_eeoo(FGrid);
+
+  std::cout<<"=========================================================="<<std::endl;
+  std::cout<<"= Testing that Meo + Moe + Moo + Mee = Munprec "<<std::endl;
+  std::cout<<"=========================================================="<<std::endl;
+
+  pickCheckerboard(Even,src_e,src);
+  pickCheckerboard(Odd,src_o,src);
+
+  Ddwf.Meooe(src_e,r_o);  std::cout<<"Applied Meo"<<std::endl;
+  Ddwf.Meooe(src_o,r_e);  std::cout<<"Applied Moe"<<std::endl;
+  setCheckerboard(r_eo,r_o);
+  setCheckerboard(r_eo,r_e);
+
+  Ddwf.Mooee(src_e,r_e);  std::cout<<"Applied Mee"<<std::endl;
+  Ddwf.Mooee(src_o,r_o);  std::cout<<"Applied Moo"<<std::endl;
+  setCheckerboard(r_eeoo,r_e);
+  setCheckerboard(r_eeoo,r_o);
+
+  r_eo=r_eo+r_eeoo;
+  Ddwf.M(src,ref);  
+
+  //  std::cout << r_eo<<std::endl;
+  //  std::cout << ref <<std::endl;
+
+  err= ref - r_eo;
+  std::cout << "EO norm diff   "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
+    
+  LatticeComplex cerr(FGrid);
+  cerr = localInnerProduct(err,err);
+  //  std::cout << cerr<<std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test Ddagger is the dagger of D by requiring                "<<std::endl;
+  std::cout<<"=  < phi | Deo | chi > * = < chi | Deo^dag| phi>  "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+  
+  LatticeFermion chi_e   (FrbGrid);
+  LatticeFermion chi_o   (FrbGrid);
+
+  LatticeFermion dchi_e  (FrbGrid);
+  LatticeFermion dchi_o  (FrbGrid);
+
+  LatticeFermion phi_e   (FrbGrid);
+  LatticeFermion phi_o   (FrbGrid);
+
+  LatticeFermion dphi_e  (FrbGrid);
+  LatticeFermion dphi_o  (FrbGrid);
+
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+  pickCheckerboard(Even,phi_e,phi);
+  pickCheckerboard(Odd ,phi_o,phi);
+
+  Ddwf.Meooe(chi_e,dchi_o);
+  Ddwf.Meooe(chi_o,dchi_e);
+  Ddwf.MeooeDag(phi_e,dphi_o);
+  Ddwf.MeooeDag(phi_o,dphi_e);
+
+  ComplexD pDce = innerProduct(phi_e,dchi_e);
+  ComplexD pDco = innerProduct(phi_o,dchi_o);
+  ComplexD cDpe = innerProduct(chi_e,dphi_e);
+  ComplexD cDpo = innerProduct(chi_o,dphi_o);
+
+  std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
+  std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
+
+  std::cout <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
+  std::cout <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MeeInv Mee = 1                                         "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+
+  Ddwf.Mooee(chi_e,src_e);
+  Ddwf.MooeeInv(src_e,phi_e);
+
+  Ddwf.Mooee(chi_o,src_o);
+  Ddwf.MooeeInv(src_o,phi_o);
+  
+  setCheckerboard(phi,phi_e);
+  setCheckerboard(phi,phi_o);
+
+  err = phi-chi;
+  std::cout << "norm diff   "<< norm2(err)<< std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MeeInvDag MeeDag = 1                                   "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+
+  Ddwf.MooeeDag(chi_e,src_e);
+  Ddwf.MooeeInvDag(src_e,phi_e);
+
+  Ddwf.MooeeDag(chi_o,src_o);
+  Ddwf.MooeeInvDag(src_o,phi_o);
+  
+  setCheckerboard(phi,phi_e);
+  setCheckerboard(phi,phi_o);
+
+  err = phi-chi;
+  std::cout << "norm diff   "<< norm2(err)<< std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MpcDagMpc is Hermitian              "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+  
+  random(RNG5,phi);
+  random(RNG5,chi);
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+  pickCheckerboard(Even,phi_e,phi);
+  pickCheckerboard(Odd ,phi_o,phi);
+  RealD t1,t2;
+
+  Ddwf.MpcDagMpc(chi_e,dchi_e,t1,t2);
+  Ddwf.MpcDagMpc(chi_o,dchi_o,t1,t2);
+
+  Ddwf.MpcDagMpc(phi_e,dphi_e,t1,t2);
+  Ddwf.MpcDagMpc(phi_o,dphi_o,t1,t2);
+
+  pDce = innerProduct(phi_e,dchi_e);
+  pDco = innerProduct(phi_o,dchi_o);
+  cDpe = innerProduct(chi_e,dphi_e);
+  cDpo = innerProduct(chi_o,dphi_o);
+
+  std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
+  std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
+
+  std::cout <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
+  std::cout <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
+  
+  Grid_finalize();
+}
diff --git a/benchmarks/Grid_wilson_evenodd.cc b/benchmarks/Grid_wilson_evenodd.cc
index a073139d..3ebc4709 100644
--- a/benchmarks/Grid_wilson_evenodd.cc
+++ b/benchmarks/Grid_wilson_evenodd.cc
@@ -68,8 +68,6 @@ int main (int argc, char ** argv)
   LatticeFermion r_o   (&RBGrid);
   LatticeFermion r_eo  (&Grid);
 
-  const int Even=0;
-  const int Odd=1;
   std::cout<<"=========================================================="<<std::endl;
   std::cout<<"= Testing that Deo + Doe = Dunprec "<<std::endl;
   std::cout<<"=========================================================="<<std::endl;
@@ -79,12 +77,11 @@ int main (int argc, char ** argv)
 
   Dw.Meooe(src_e,r_o);  std::cout<<"Applied Meo"<<std::endl;
   Dw.Meooe(src_o,r_e);  std::cout<<"Applied Moe"<<std::endl;
-  Dw.Dhop (src,ref,0);
+  Dw.Dhop (src,ref,DaggerNo);
 
   setCheckerboard(r_eo,r_o);
   setCheckerboard(r_eo,r_e);
 
-  ref = (-0.5)*ref;
   err= ref - r_eo;
   std::cout << "EO norm diff   "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
 
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index b2649669..e92f2e84 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -5,14 +5,34 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 #
 # Test code
 #
-bin_PROGRAMS = Grid_wilson Grid_comms Grid_memory_bandwidth Grid_su3 Grid_wilson_cg_unprec Grid_wilson_evenodd  Grid_wilson_cg_prec Grid_wilson_cg_schur Grid_dwf
+bin_PROGRAMS = \
+	Grid_comms \
+	Grid_memory_bandwidth \
+	Grid_su3 \
+	Grid_wilson \
+	Grid_wilson_evenodd \
+	Grid_wilson_cg_unprec \
+	Grid_wilson_cg_prec \
+	Grid_wilson_cg_schur \
+	Grid_dwf\
+	Grid_dwf_even_odd\
+	Grid_dwf_cg_unprec\
+	Grid_dwf_cg_prec\
+	Grid_dwf_cg_schur
+
+Grid_comms_SOURCES = Grid_comms.cc
+Grid_comms_LDADD = -lGrid
+
+Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
+Grid_su3_LDADD = -lGrid
+
+Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
+Grid_memory_bandwidth_LDADD = -lGrid
+
 
 Grid_wilson_SOURCES = Grid_wilson.cc
 Grid_wilson_LDADD = -lGrid
 
-Grid_dwf_SOURCES = Grid_dwf.cc
-Grid_dwf_LDADD = -lGrid
-
 Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
 Grid_wilson_evenodd_LDADD = -lGrid
 
@@ -25,12 +45,18 @@ Grid_wilson_cg_prec_LDADD = -lGrid
 Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc
 Grid_wilson_cg_schur_LDADD = -lGrid
 
-Grid_comms_SOURCES = Grid_comms.cc
-Grid_comms_LDADD = -lGrid
+Grid_dwf_SOURCES = Grid_dwf.cc
+Grid_dwf_LDADD = -lGrid
 
-Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
-Grid_su3_LDADD = -lGrid
+Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc
+Grid_dwf_even_odd_LDADD = -lGrid
 
-Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
-Grid_memory_bandwidth_LDADD = -lGrid
+Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc
+Grid_dwf_cg_unprec_LDADD = -lGrid
+
+Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc
+Grid_dwf_cg_prec_LDADD = -lGrid
+
+Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc
+Grid_dwf_cg_schur_LDADD = -lGrid
 
diff --git a/lib/Grid_init.cc b/lib/Grid_init.cc
index 580e602a..20371a4b 100644
--- a/lib/Grid_init.cc
+++ b/lib/Grid_init.cc
@@ -143,7 +143,7 @@ void Grid_init(int *argc,char ***argv)
   }
   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
     WilsonFermion::HandOptDslash=1;
-    FiveDimWilsonFermion::HandOptDslash=1;
+    WilsonFermion5D::HandOptDslash=1;
   }
   if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
     LebesgueOrder::UseLebesgueOrder=1;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index e983b432..b688d8b2 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -20,13 +20,17 @@ libGrid_a_SOURCES =				\
 	stencil/Grid_stencil_common.cc		\
 	algorithms/approx/Zolotarev.cc		\
 	algorithms/approx/Remez.cc		\
-	qcd/action/fermion/FiveDimWilsonFermion.cc\
-	qcd/action/fermion/WilsonFermion.cc\
+	qcd/SpaceTimeGrid.cc\
+	qcd/Dirac.cc\
 	qcd/action/fermion/WilsonKernels.cc\
 	qcd/action/fermion/WilsonKernelsHand.cc\
-	qcd/Dirac.cc\
+	qcd/action/fermion/WilsonFermion.cc\
+	qcd/action/fermion/WilsonFermion5D.cc\
+	qcd/action/fermion/CayleyFermion5D.cc \
+	qcd/action/fermion/ContinuedFractionFermion5D.cc	\
 	$(extra_sources)
 
+#	qcd/action/fermion/PartialFractionFermion5D.cc	\
 #
 # Include files
 #
@@ -95,11 +99,11 @@ nobase_include_HEADERS=\
 		./math/Grid_math_transpose.h\
 		./parallelIO/GridNerscIO.h\
 		./qcd/action/Actions.h\
-		./qcd/action/fermion/FermionAction.h\
-		./qcd/action/fermion/FiveDimWilsonFermion.h\
+		./qcd/action/fermion/FermionOperator.h\
 		./qcd/action/fermion/WilsonCompressor.h\
-		./qcd/action/fermion/WilsonFermion.h\
 		./qcd/action/fermion/WilsonKernels.h\
+		./qcd/action/fermion/WilsonFermion.h\
+		./qcd/action/fermion/WilsonFermion5D.h\
 		./qcd/Dirac.h\
 		./qcd/QCD.h\
 		./qcd/TwoSpinor.h\
diff --git a/lib/algorithms/LinearOperator.h b/lib/algorithms/LinearOperator.h
index cb6240ba..5839f2c1 100644
--- a/lib/algorithms/LinearOperator.h
+++ b/lib/algorithms/LinearOperator.h
@@ -125,39 +125,7 @@ namespace Grid {
      };
     */
 
-    // Chroma interface defining GaugeAction
-    /*
-      template<typename P, typename Q>   class GaugeAction
-  virtual const CreateGaugeState<P,Q>& getCreateState() const = 0;
-  virtual GaugeState<P,Q>* createState(const Q& q) const
-  virtual const GaugeBC<P,Q>& getGaugeBC() const
-  virtual const Set& getSet(void) const = 0;
-  virtual void deriv(P& result, const Handle< GaugeState<P,Q> >& state) const 
-  virtual Double S(const Handle< GaugeState<P,Q> >& state) const = 0;
 
-  class LinearGaugeAction : public GaugeAction< multi1d<LatticeColorMatrix>, multi1d<LatticeColorMatrix> >
-  typedef multi1d<LatticeColorMatrix>  P;
-  typedef multi1d<LatticeColorMatrix>  Q;
-  virtual void staple(LatticeColorMatrix& result,
-		      const Handle< GaugeState<P,Q> >& state,
-		      int mu, int cb) const = 0;
-    */
-
-    // Chroma interface defining FermionAction
-    /*
-     template<typename T, typename P, typename Q>  class FermAct4D : public FermionAction<T,P,Q>
-     virtual LinearOperator<T>* linOp(Handle< FermState<T,P,Q> > state) const = 0;
-     virtual LinearOperator<T>* lMdagM(Handle< FermState<T,P,Q> > state) const = 0;
-     virtual LinOpSystemSolver<T>* invLinOp(Handle< FermState<T,P,Q> > state,
-     virtual MdagMSystemSolver<T>* invMdagM(Handle< FermState<T,P,Q> > state,
-     virtual LinOpMultiSystemSolver<T>* mInvLinOp(Handle< FermState<T,P,Q> > state,
-     virtual MdagMMultiSystemSolver<T>* mInvMdagM(Handle< FermState<T,P,Q> > state,
-     virtual MdagMMultiSystemSolverAccumulate<T>* mInvMdagMAcc(Handle< FermState<T,P,Q> > state,
-     virtual SystemSolver<T>* qprop(Handle< FermState<T,P,Q> > state,
-     class DiffFermAct4D : public FermAct4D<T,P,Q>
-     virtual DiffLinearOperator<T,Q,P>* linOp(Handle< FermState<T,P,Q> > state) const = 0;
-     virtual DiffLinearOperator<T,Q,P>* lMdagM(Handle< FermState<T,P,Q> > state) const = 0;
-    */
 }
 
 #endif
diff --git a/lib/algorithms/approx/Zolotarev.cc b/lib/algorithms/approx/Zolotarev.cc
index 7629bbde..c73a3436 100644
--- a/lib/algorithms/approx/Zolotarev.cc
+++ b/lib/algorithms/approx/Zolotarev.cc
@@ -58,6 +58,8 @@
 
 /* Compute the partial fraction expansion coefficients (alpha) from the
  * factored form */
+namespace Grid {
+namespace Approx {
 
 static void construct_partfrac(izd *z) {
   int dn = z -> dn, dd = z -> dd, type = z -> type;
@@ -291,7 +293,7 @@ static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k,
  * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
  * type = 1 for the approximation which is infinite at x = 0. */
 
-zolotarev_data* bfm_zolotarev(PRECISION epsilon, int n, int type) {
+zolotarev_data* grid_zolotarev(PRECISION epsilon, int n, int type) {
   INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
     l, invlambda, xi, xisq, *tv, s, opl;
   int m, czero, ts;
@@ -412,7 +414,7 @@ zolotarev_data* bfm_zolotarev(PRECISION epsilon, int n, int type) {
   return zd;
 }
 
-zolotarev_data* bfm_higham(PRECISION epsilon, int n) {
+zolotarev_data* grid_higham(PRECISION epsilon, int n) {
   INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
   int m, czero;
   zolotarev_data *zd;
@@ -502,6 +504,7 @@ zolotarev_data* bfm_higham(PRECISION epsilon, int n) {
   free(d);
   return zd;
 }
+}}
 
 #ifdef TEST
 
@@ -707,4 +710,6 @@ int main(int argc, char** argv) {
 
   return EXIT_SUCCESS;
 }
+
+
 #endif /* TEST */
diff --git a/lib/algorithms/approx/Zolotarev.h b/lib/algorithms/approx/Zolotarev.h
index 3f0dc58e..869e5a89 100644
--- a/lib/algorithms/approx/Zolotarev.h
+++ b/lib/algorithms/approx/Zolotarev.h
@@ -1,7 +1,8 @@
 /* -*- Mode: C; comment-column: 22; fill-column: 79; -*- */
 
 #ifdef __cplusplus
-extern "C" {
+namespace Grid {
+namespace Approx {
 #endif
 
 #define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY>
@@ -76,10 +77,10 @@ typedef struct {
  * zolotarev_data structure. The arguments must satisfy the constraints that
  * epsilon > 0, n > 0, and type = 0 or 1. */
 
-ZOLOTAREV_DATA* bfm_higham(PRECISION epsilon, int n) ;
-ZOLOTAREV_DATA* bfm_zolotarev(PRECISION epsilon, int n, int type);
+ZOLOTAREV_DATA* grid_higham(PRECISION epsilon, int n) ;
+ZOLOTAREV_DATA* grid_zolotarev(PRECISION epsilon, int n, int type);
 #endif
 
 #ifdef __cplusplus
-}
+}}
 #endif
diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Grid_cartesian_base.h
index e93125c1..66339648 100644
--- a/lib/cartesian/Grid_cartesian_base.h
+++ b/lib/cartesian/Grid_cartesian_base.h
@@ -21,7 +21,7 @@ public:
     // Give Lattice access
     template<class object> friend class Lattice;
 
-    GridBase(std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
+    GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
 
 
     // Physics Grid information.
diff --git a/lib/cartesian/Grid_cartesian_full.h b/lib/cartesian/Grid_cartesian_full.h
index 330bbfaf..2a9e0be8 100644
--- a/lib/cartesian/Grid_cartesian_full.h
+++ b/lib/cartesian/Grid_cartesian_full.h
@@ -27,9 +27,9 @@ public:
     virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){
       return shift;
     }
-    GridCartesian(std::vector<int> &dimensions,
-		  std::vector<int> &simd_layout,
-		  std::vector<int> &processor_grid
+    GridCartesian(const std::vector<int> &dimensions,
+		  const std::vector<int> &simd_layout,
+		  const std::vector<int> &processor_grid
 		  ) : GridBase(processor_grid)
     {
         ///////////////////////
diff --git a/lib/cartesian/Grid_cartesian_red_black.h b/lib/cartesian/Grid_cartesian_red_black.h
index ace36edb..3a84ed49 100644
--- a/lib/cartesian/Grid_cartesian_red_black.h
+++ b/lib/cartesian/Grid_cartesian_red_black.h
@@ -81,28 +81,28 @@ public:
       }
     };
 
-    GridRedBlackCartesian(GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors)  {};
+    GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors)  {};
 
-    GridRedBlackCartesian(std::vector<int> &dimensions,
-			  std::vector<int> &simd_layout,
-			  std::vector<int> &processor_grid,
-			  std::vector<int> &checker_dim_mask,
+    GridRedBlackCartesian(const std::vector<int> &dimensions,
+			  const std::vector<int> &simd_layout,
+			  const std::vector<int> &processor_grid,
+			  const std::vector<int> &checker_dim_mask,
 			  int checker_dim
 			  ) :  GridBase(processor_grid) 
     {
       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim);
     }
-    GridRedBlackCartesian(std::vector<int> &dimensions,
-			  std::vector<int> &simd_layout,
-			  std::vector<int> &processor_grid) : GridBase(processor_grid) 
+    GridRedBlackCartesian(const std::vector<int> &dimensions,
+			  const std::vector<int> &simd_layout,
+			  const std::vector<int> &processor_grid) : GridBase(processor_grid) 
     {
       std::vector<int> checker_dim_mask(dimensions.size(),1);
       Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0);
     }
-    void Init(std::vector<int> &dimensions,
-	      std::vector<int> &simd_layout,
-	      std::vector<int> &processor_grid,
-	      std::vector<int> &checker_dim_mask,
+    void Init(const std::vector<int> &dimensions,
+	      const std::vector<int> &simd_layout,
+	      const std::vector<int> &processor_grid,
+	      const std::vector<int> &checker_dim_mask,
 	      int checker_dim)
     {
     ///////////////////////
diff --git a/lib/communicator/Grid_communicator_base.h b/lib/communicator/Grid_communicator_base.h
index 47c1f525..61e19993 100644
--- a/lib/communicator/Grid_communicator_base.h
+++ b/lib/communicator/Grid_communicator_base.h
@@ -27,7 +27,7 @@ class CartesianCommunicator {
 #endif
 
     // Constructor
-    CartesianCommunicator(std::vector<int> &pdimensions_in);
+    CartesianCommunicator(const std::vector<int> &pdimensions_in);
 
     // Wraps MPI_Cart routines
     void ShiftedRanks(int dim,int shift,int & source, int & dest);
diff --git a/lib/communicator/Grid_communicator_mpi.cc b/lib/communicator/Grid_communicator_mpi.cc
index 6ef05c3d..5dd34705 100644
--- a/lib/communicator/Grid_communicator_mpi.cc
+++ b/lib/communicator/Grid_communicator_mpi.cc
@@ -5,7 +5,7 @@ namespace Grid {
 
   // Should error check all MPI calls.
 
-CartesianCommunicator::CartesianCommunicator(std::vector<int> &processors)
+CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 {
   _ndimension = processors.size();
   std::vector<int> periodic(_ndimension,1);
diff --git a/lib/communicator/Grid_communicator_none.cc b/lib/communicator/Grid_communicator_none.cc
index 90eb26cc..d7eb9453 100644
--- a/lib/communicator/Grid_communicator_none.cc
+++ b/lib/communicator/Grid_communicator_none.cc
@@ -1,7 +1,7 @@
 #include "Grid.h"
 namespace Grid {
 
-CartesianCommunicator::CartesianCommunicator(std::vector<int> &processors)
+CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 {
   _processors = processors;
   _ndimension = processors.size();
diff --git a/lib/qcd/LinalgUtils.h b/lib/qcd/LinalgUtils.h
new file mode 100644
index 00000000..2b83d115
--- /dev/null
+++ b/lib/qcd/LinalgUtils.h
@@ -0,0 +1,113 @@
+#ifndef GRID_QCD_LINALG_UTILS_H
+#define GRID_QCD_LINALG_UTILS_H
+
+namespace Grid{
+namespace QCD{
+////////////////////////////////////////////////////////////////////////
+//This file brings additional linear combination assist that is helpful
+//to QCD such as chiral projectors and spin matrices applied to one of the inputs.
+//These routines support five-D chiral fermions and contain s-subslice indexing 
+//on the 5d (rb4d) checkerboarded lattices
+////////////////////////////////////////////////////////////////////////
+template<class vobj> 
+void axpby_ssp(Lattice<vobj> &z, RealD a,const Lattice<vobj> &x,RealD b,const Lattice<vobj> &y,int s,int sp)
+{
+  z.checkerboard = x.checkerboard;
+  conformable(x,y);
+  conformable(x,z);
+  GridBase *grid=x._grid;
+  int Ls = grid->_rdimensions[0];
+PARALLEL_FOR_LOOP
+  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
+    vobj tmp = a*x._odata[ss+s]+b*y._odata[ss+sp];
+    vstream(z._odata[ss+s],tmp);
+  }
+}
+
+template<class vobj> 
+void ag5xpby_ssp(Lattice<vobj> &z,RealD a,const Lattice<vobj> &x,RealD b,const Lattice<vobj> &y,int s,int sp)
+{
+  z.checkerboard = x.checkerboard;
+  conformable(x,y);
+  conformable(x,z);
+  GridBase *grid=x._grid;
+  int Ls = grid->_rdimensions[0];
+PARALLEL_FOR_LOOP
+  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
+    vobj tmp;
+    multGamma5(tmp(),a*x._odata[ss+s]());
+    tmp = tmp + b*y._odata[ss+sp];
+    vstream(z._odata[ss+s],tmp);
+  }
+}
+
+template<class vobj> 
+void axpbg5y_ssp(Lattice<vobj> &z,RealD a,const Lattice<vobj> &x,RealD b,const Lattice<vobj> &y,int s,int sp)
+{
+  z.checkerboard = x.checkerboard;
+  conformable(x,y);
+  conformable(x,z);
+  GridBase *grid=x._grid;
+  int Ls = grid->_rdimensions[0];
+PARALLEL_FOR_LOOP
+  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
+    vobj tmp;
+    multGamma5(tmp(),b*y._odata[ss+sp]());
+    tmp = tmp + a*x._odata[ss+s];
+    vstream(z._odata[ss+s],tmp);
+  }
+}
+
+template<class vobj> 
+void ag5xpbg5y_ssp(Lattice<vobj> &z,RealD a,const Lattice<vobj> &x,RealD b,const Lattice<vobj> &y,int s,int sp)
+{
+  z.checkerboard = x.checkerboard;
+  conformable(x,y);
+  conformable(x,z);
+  GridBase *grid=x._grid;
+  int Ls = grid->_rdimensions[0];
+PARALLEL_FOR_LOOP
+  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
+    vobj tmp1;
+    vobj tmp2;
+    tmp1 = a*x._odata[ss+s]+b*y._odata[ss+sp];
+    multGamma5(tmp2(),tmp1());
+    vstream(z._odata[ss+s],tmp2);
+  }
+}
+
+template<class vobj> 
+void axpby_ssp_pminus(Lattice<vobj> &z,RealD a,const Lattice<vobj> &x,RealD b,const Lattice<vobj> &y,int s,int sp)
+{
+  z.checkerboard = x.checkerboard;
+  conformable(x,y);
+  conformable(x,z);
+  GridBase *grid=x._grid;
+  int Ls = grid->_rdimensions[0];
+PARALLEL_FOR_LOOP
+  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
+    vobj tmp;
+    spProj5m(tmp,y._odata[ss+sp]);
+    tmp = a*x._odata[ss+s]+b*tmp;
+    vstream(z._odata[ss+s],tmp);
+  }
+}
+
+template<class vobj> 
+void axpby_ssp_pplus(Lattice<vobj> &z,RealD a,const Lattice<vobj> &x,RealD b,const Lattice<vobj> &y,int s,int sp)
+{
+  z.checkerboard = x.checkerboard;
+  conformable(x,y);
+  conformable(x,z);
+  GridBase *grid=x._grid;
+  int Ls = grid->_rdimensions[0];
+PARALLEL_FOR_LOOP
+  for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
+    vobj tmp;
+    spProj5p(tmp,y._odata[ss+sp]);
+    tmp = a*x._odata[ss+s]+b*tmp;
+    vstream(z._odata[ss+s],tmp);
+  }
+}
+}}
+#endif 
diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h
index 7c45eb23..4b5edc5c 100644
--- a/lib/qcd/QCD.h
+++ b/lib/qcd/QCD.h
@@ -307,8 +307,10 @@ namespace QCD {
 }   //namespace QCD
 } // Grid
 
+#include <qcd/SpaceTimeGrid.h>
 #include <qcd/Dirac.h>
 #include <qcd/TwoSpinor.h>
+#include <qcd/LinalgUtils.h>
 #include <qcd/action/Actions.h>
 
 #endif
diff --git a/lib/qcd/SpaceTimeGrid.cc b/lib/qcd/SpaceTimeGrid.cc
new file mode 100644
index 00000000..284c5771
--- /dev/null
+++ b/lib/qcd/SpaceTimeGrid.cc
@@ -0,0 +1,52 @@
+#include <Grid.h>
+
+namespace Grid { 
+  namespace QCD {
+
+/////////////////////////////////////////////////////////////////
+// Public interface
+/////////////////////////////////////////////////////////////////
+GridCartesian *SpaceTimeGrid::makeFourDimGrid(const std::vector<int> & latt,const std::vector<int> &simd,const std::vector<int> &mpi)
+{
+  return new GridCartesian(latt,simd,mpi); 
+}
+GridRedBlackCartesian *SpaceTimeGrid::makeFourDimRedBlackGrid(const GridCartesian *FourDimGrid)
+{
+  return new GridRedBlackCartesian(FourDimGrid); 
+}
+
+GridCartesian         *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian *FourDimGrid)
+{
+  int N4=FourDimGrid->_ndimension;
+
+  std::vector<int> latt5(1,Ls);
+  std::vector<int> simd5(1,1);
+  std::vector<int>  mpi5(1,1);
+  
+  for(int d=0;d<N4;d++){
+    latt5.push_back(FourDimGrid->_fdimensions[d]);
+    simd5.push_back(FourDimGrid->_simd_layout[d]);
+     mpi5.push_back(FourDimGrid->_processors[d]);
+  }
+  return new GridCartesian(latt5,simd5,mpi5); 
+}
+
+GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridCartesian *FourDimGrid)
+{
+  int N4=FourDimGrid->_ndimension;
+  int cbd=1;
+  std::vector<int> latt5(1,Ls);
+  std::vector<int> simd5(1,1);
+  std::vector<int>  mpi5(1,1);
+  std::vector<int>   cb5(1,0);
+    
+  for(int d=0;d<N4;d++){
+    latt5.push_back(FourDimGrid->_fdimensions[d]);
+    simd5.push_back(FourDimGrid->_simd_layout[d]);
+     mpi5.push_back(FourDimGrid->_processors[d]);
+      cb5.push_back(  1);
+    }
+  return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd); 
+}
+
+}}
diff --git a/lib/qcd/SpaceTimeGrid.h b/lib/qcd/SpaceTimeGrid.h
new file mode 100644
index 00000000..0b386a0e
--- /dev/null
+++ b/lib/qcd/SpaceTimeGrid.h
@@ -0,0 +1,18 @@
+#ifndef GRID_QCD_SPACE_TIME_GRID_H
+#define GRID_QCD_SPACE_TIME_GRID_H
+namespace Grid {
+namespace QCD {
+
+class SpaceTimeGrid {
+ public:
+
+  static GridCartesian         *makeFourDimGrid(const std::vector<int> & latt,const std::vector<int> &simd,const std::vector<int> &mpi);
+  static GridRedBlackCartesian *makeFourDimRedBlackGrid       (const GridCartesian *FourDimGrid);
+  static GridCartesian         *makeFiveDimGrid        (int Ls,const GridCartesian *FourDimGrid);
+  static GridRedBlackCartesian *makeFiveDimRedBlackGrid(int Ls,const GridCartesian *FourDimGrid);
+
+};
+
+}}
+
+#endif
diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index c4e8a2f0..acbf027c 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -1,10 +1,80 @@
 #ifndef GRID_QCD_ACTIONS_H
 #define GRID_QCD_ACTIONS_H
 
-#include <qcd/action/fermion/FermionAction.h>
-#include <qcd/action/fermion/WilsonCompressor.h>
-#include <qcd/action/fermion/WilsonKernels.h>
+
+// Some reorganisation likely required as both Chroma and IroIro
+// are separating the concept of the operator from that of action.
+//
+// The FermAction contains methods to create 
+//
+// * Linear operators             (Hermitian and non-hermitian)  .. my LinearOperator
+// * System solvers               (Hermitian and non-hermitian)  .. my OperatorFunction
+// * MultiShift System solvers    (Hermitian and non-hermitian)  .. my OperatorFunction
+
+
+////////////////////////////////////////////
+// Abstract base interface
+////////////////////////////////////////////
+#include <qcd/action/fermion/FermionOperator.h>
+
+////////////////////////////////////////////
+// Utility functions
+////////////////////////////////////////////
+#include <qcd/action/fermion/WilsonCompressor.h>     //used by all wilson type fermions
+#include <qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions
+
+////////////////////////////////////////////
+// 4D formulations
+////////////////////////////////////////////
 #include <qcd/action/fermion/WilsonFermion.h>
-#include <qcd/action/fermion/FiveDimWilsonFermion.h>
+//#include <qcd/action/fermion/CloverFermion.h>
+
+////////////////////////////////////////////
+// 5D formulations
+////////////////////////////////////////////
+#include <qcd/action/fermion/WilsonFermion5D.h> // used by all 5d overlap types
+#include <qcd/action/fermion/CayleyFermion5D.h>
+#include <qcd/action/fermion/ContinuedFractionFermion5D.h>
+//#include <qcd/action/fermion/PartialFraction.h>
+
+#include <qcd/action/fermion/DomainWallFermion.h>
+//#include <qcd/action/fermion/ScaledShamirCayleyTanh.h>
+
+
+    // Chroma interface defining FermionAction
+    /*
+     template<typename T, typename P, typename Q>  class FermAct4D : public FermionAction<T,P,Q>
+     virtual LinearOperator<T>* linOp(Handle< FermState<T,P,Q> > state) const = 0;
+     virtual LinearOperator<T>* lMdagM(Handle< FermState<T,P,Q> > state) const = 0;
+     virtual LinOpSystemSolver<T>* invLinOp(Handle< FermState<T,P,Q> > state,
+     virtual MdagMSystemSolver<T>* invMdagM(Handle< FermState<T,P,Q> > state,
+     virtual LinOpMultiSystemSolver<T>* mInvLinOp(Handle< FermState<T,P,Q> > state,
+     virtual MdagMMultiSystemSolver<T>* mInvMdagM(Handle< FermState<T,P,Q> > state,
+     virtual MdagMMultiSystemSolverAccumulate<T>* mInvMdagMAcc(Handle< FermState<T,P,Q> > state,
+     virtual SystemSolver<T>* qprop(Handle< FermState<T,P,Q> > state,
+     class DiffFermAct4D : public FermAct4D<T,P,Q>
+     virtual DiffLinearOperator<T,Q,P>* linOp(Handle< FermState<T,P,Q> > state) const = 0;
+     virtual DiffLinearOperator<T,Q,P>* lMdagM(Handle< FermState<T,P,Q> > state) const = 0;
+    */
+
+
+    // Chroma interface defining GaugeAction
+    /*
+      template<typename P, typename Q>   class GaugeAction
+  virtual const CreateGaugeState<P,Q>& getCreateState() const = 0;
+  virtual GaugeState<P,Q>* createState(const Q& q) const
+  virtual const GaugeBC<P,Q>& getGaugeBC() const
+  virtual const Set& getSet(void) const = 0;
+  virtual void deriv(P& result, const Handle< GaugeState<P,Q> >& state) const 
+  virtual Double S(const Handle< GaugeState<P,Q> >& state) const = 0;
+
+  class LinearGaugeAction : public GaugeAction< multi1d<LatticeColorMatrix>, multi1d<LatticeColorMatrix> >
+  typedef multi1d<LatticeColorMatrix>  P;
+  typedef multi1d<LatticeColorMatrix>  Q;
+  virtual void staple(LatticeColorMatrix& result,
+		      const Handle< GaugeState<P,Q> >& state,
+		      int mu, int cb) const = 0;
+    */
+
 
 #endif
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc
new file mode 100644
index 00000000..263cc28b
--- /dev/null
+++ b/lib/qcd/action/fermion/CayleyFermion5D.cc
@@ -0,0 +1,235 @@
+#include <Grid.h>
+namespace Grid {
+namespace QCD {
+
+ CayleyFermion5D::CayleyFermion5D(LatticeGaugeField &_Umu,
+				  GridCartesian         &FiveDimGrid,
+				  GridRedBlackCartesian &FiveDimRedBlackGrid,
+				  GridCartesian         &FourDimGrid,
+				  GridRedBlackCartesian &FourDimRedBlackGrid,
+				  RealD _mass,RealD _M5) :
+   WilsonFermion5D(_Umu,
+		   FiveDimGrid,
+		   FiveDimRedBlackGrid,
+		   FourDimGrid,
+		   FourDimRedBlackGrid,_M5),
+   mass(_mass)
+ {
+   std::cout << "Constructing a CayleyFermion5D"<<std::endl;
+ }
+
+  // override multiply
+  RealD CayleyFermion5D::M    (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    LatticeFermion Din(psi._grid);
+
+    // Assemble Din
+    for(int s=0;s<Ls;s++){
+      if ( s==0 ) {
+	//	Din = bs psi[s] + cs[s] psi[s+1}
+	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
+	//      Din+= -mass*cs[s] psi[s+1}
+	axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
+      } else if ( s==(Ls-1)) { 
+	axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
+	axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
+      } else {
+	axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
+	axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
+      }
+    }
+
+    DW(Din,chi,DaggerNo);
+    // ((b D_W + D_w hop terms +1) on s-diag
+    axpby(chi,1.0,1.0,chi,psi); 
+
+    for(int s=0;s<Ls;s++){
+      if ( s==0 ){
+	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
+	axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
+      } else if ( s==(Ls-1)) {
+	axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
+	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
+      } else {
+	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
+	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
+      }
+    }
+    return norm2(chi);
+  }
+
+  RealD CayleyFermion5D::Mdag (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    // Under adjoint
+    //D1+        D1- P-    ->   D1+^dag   P+ D2-^dag
+    //D2- P+     D2+            P-D1-^dag D2+dag
+
+    LatticeFermion Din(psi._grid);
+    // Apply Dw
+    DW(psi,Din,DaggerYes); 
+
+    for(int s=0;s<Ls;s++){
+      // Collect the terms in DW
+      //	Chi = bs Din[s] + cs[s] Din[s+1}
+      //    Chi+= -mass*cs[s] psi[s+1}
+      if ( s==0 ) {
+	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
+	axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
+      } else if ( s==(Ls-1)) { 
+	axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0);
+	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
+      } else {
+	axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
+	axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
+      }
+      // Collect the terms indept of DW
+      if ( s==0 ){
+	axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
+	axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
+      } else if ( s==(Ls-1)) {
+	axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
+	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
+      } else {
+	axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
+	axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
+      }
+    }
+    // ((b D_W + D_w hop terms +1) on s-diag
+    axpby (chi,1.0,1.0,chi,psi); 
+    return norm2(chi);
+  }
+
+  // half checkerboard operations
+  void CayleyFermion5D::Meooe       (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    LatticeFermion tmp(psi._grid);
+    // Assemble the 5d matrix
+    for(int s=0;s<Ls;s++){
+      if ( s==0 ) {
+	//	tmp = bs psi[s] + cs[s] psi[s+1}
+	//      tmp+= -mass*cs[s] psi[s+1}
+	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
+	axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
+      } else if ( s==(Ls-1)) { 
+	axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
+	axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
+      } else {
+	axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
+	axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
+      }
+    }
+    // Apply 4d dslash
+    if ( psi.checkerboard == Odd ) {
+      DhopEO(tmp,chi,DaggerNo);
+    } else {
+      DhopOE(tmp,chi,DaggerNo);
+    }
+  }
+
+  void CayleyFermion5D::MeooeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    LatticeFermion tmp(psi._grid);
+    // Apply 4d dslash
+    if ( psi.checkerboard == Odd ) {
+      DhopEO(psi,tmp,DaggerYes);
+    } else {
+      DhopOE(psi,tmp,DaggerYes);
+    }
+    // Assemble the 5d matrix
+    for(int s=0;s<Ls;s++){
+      if ( s==0 ) {
+	axpby_ssp_pplus(chi,beo[s],tmp,   -ceo[s+1]  ,tmp,s,s+1);
+	axpby_ssp_pminus(chi,   1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1);
+      } else if ( s==(Ls-1)) { 
+	axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0);
+	axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1);
+      } else {
+	axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1);
+	axpby_ssp_pminus(chi,1.0   ,chi,-ceo[s-1],tmp,s,s-1);
+      }
+    }
+  }
+
+  void CayleyFermion5D::Mooee       (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    for (int s=0;s<Ls;s++){
+      if ( s==0 ) {
+	axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
+	axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
+      } else if ( s==(Ls-1)) { 
+	axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
+	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
+      } else {
+	axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
+	axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
+      }
+    }
+  }
+
+  void CayleyFermion5D::MooeeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    for (int s=0;s<Ls;s++){
+      // Assemble the 5d matrix
+      if ( s==0 ) {
+	axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1]  ,psi,s,s+1);
+	axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
+      } else if ( s==(Ls-1)) { 
+	axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
+	axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
+      } else {
+	axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
+	axpby_ssp_pminus(chi,1.0   ,chi,-cee[s-1],psi,s,s-1);
+      }
+    }
+  }
+
+  void CayleyFermion5D::MooeeInv    (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    // Apply (L^{\prime})^{-1}
+    axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
+    for (int s=1;s<Ls;s++){
+      axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
+    }
+    // L_m^{-1} 
+    for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
+      axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
+    }
+    // U_m^{-1} D^{-1}
+    for (int s=0;s<Ls-1;s++){
+      // Chi[s] + 1/d chi[s] 
+      axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
+    }	
+    axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable 
+    
+    // Apply U^{-1}
+    for (int s=Ls-2;s>=0;s--){
+      axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1);  // chi[Ls]
+    }
+  }
+
+  void CayleyFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
+  {
+    // Apply (U^{\prime})^{-dagger}
+    axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
+    for (int s=1;s<Ls;s++){
+      axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
+    }
+    // U_m^{-\dagger} 
+    for (int s=0;s<Ls-1;s++){
+      axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
+    }
+    // L_m^{-\dagger} D^{-dagger}
+    for (int s=0;s<Ls-1;s++){
+      axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
+    }	
+    axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable 
+    
+    // Apply L^{-dagger}
+    for (int s=Ls-2;s>=0;s--){
+      axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls]
+    }
+  }
+
+}
+}
+
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h
new file mode 100644
index 00000000..857ac124
--- /dev/null
+++ b/lib/qcd/action/fermion/CayleyFermion5D.h
@@ -0,0 +1,61 @@
+#ifndef  GRID_QCD_CAYLEY_FERMION_H
+#define  GRID_QCD_CAYLEY_FERMION_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    class CayleyFermion5D : public WilsonFermion5D
+    {
+    public:
+
+      // override multiply
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+
+      // half checkerboard operations
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+
+      //    protected:
+
+      Approx::zolotarev_data *zdata;
+
+      RealD mass;
+      // Cayley form Moebius (tanh and zolotarev)
+      std::vector<RealD> omega; 
+      std::vector<RealD> bs;    // S dependent coeffs
+      std::vector<RealD> cs;    
+      std::vector<RealD> as;    
+      // For preconditioning Cayley form
+      std::vector<RealD> bee;    
+      std::vector<RealD> cee;    
+      std::vector<RealD> aee;    
+      std::vector<RealD> beo;    
+      std::vector<RealD> ceo;    
+      std::vector<RealD> aeo;    
+      // LDU factorisation of the eeoo matrix
+      std::vector<RealD> lee;    
+      std::vector<RealD> leem;    
+      std::vector<RealD> uee;    
+      std::vector<RealD> ueem;    
+      std::vector<RealD> dee;    
+
+      // Constructors
+      CayleyFermion5D(LatticeGaugeField &_Umu,
+		      GridCartesian         &FiveDimGrid,
+		      GridRedBlackCartesian &FiveDimRedBlackGrid,
+		      GridCartesian         &FourDimGrid,
+		      GridRedBlackCartesian &FourDimRedBlackGrid,
+		      RealD _mass,RealD _M5);
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
new file mode 100644
index 00000000..c281b486
--- /dev/null
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
@@ -0,0 +1,119 @@
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    RealD  ContinuedFractionFermion5D::M           (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      LatticeFermion D(psi._grid);
+
+      DW(psi,D,DaggerNo); 
+
+      int sign=1;
+      for(int s=0;s<Ls;s++){
+	if ( s==0 ) {
+	  ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*scale,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
+	} else if ( s==(Ls-1) ){
+	  RealD R=(1.0+mass)/(1.0-mass);
+	  ag5xpby_ssp(chi,Beta[s]*scale,D,sqrt_cc[s-1],psi,s,s-1);
+	  ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
+	} else {
+	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*scale,D,sqrt_cc[s],psi,s,s+1);
+  	  axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
+	}
+	sign=-sign; 
+      }
+      return norm2(chi);
+    }
+    RealD  ContinuedFractionFermion5D::Mdag        (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      // This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
+      // The rest of matrix is symmetric.
+      // Can ignore "dag"
+      return M(psi,chi);
+    }
+    void   ContinuedFractionFermion5D::Meooe       (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      Dhop(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
+      
+      int sign=1;
+      for(int s=0;s<Ls;s++){
+	if ( s==(Ls-1) ){
+	  ag5xpby_ssp(chi,Beta[s]*scale,chi,0.0,chi,s,s);
+	} else {
+	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*scale,chi,0.0,chi,s,s);
+	}
+	sign=-sign; 
+    }
+
+    }
+    void   ContinuedFractionFermion5D::MeooeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      Meooe(psi,chi);
+    }
+    void   ContinuedFractionFermion5D::Mooee       (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      double dw_diag = (4.0-this->M5)*scale;
+    
+      int sign=1;
+      for(int s=0;s<Ls;s++){
+	if ( s==0 ) {
+	  ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*dw_diag,psi,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
+	} else if ( s==(Ls-1) ){
+	  // Drop the CC here.
+	  double R=(1+this->mass)/(1-this->mass);
+	  ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1);
+	  ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
+	} else {
+	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*dw_diag,psi,sqrt_cc[s],psi,s,s+1);
+	  axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
+	}
+	sign=-sign; 
+      }
+    }
+
+    void   ContinuedFractionFermion5D::MooeeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      Mooee(psi,chi);
+    }
+    void   ContinuedFractionFermion5D::MooeeInv    (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      // Apply Linv
+      axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
+      for(int s=1;s<Ls;s++){
+	axpbg5y_ssp(chi,1.0/cc_d[s],psi,-1.0/See[s-1],chi,s,s-1);
+      }
+      // Apply Dinv
+      for(int s=0;s<Ls;s++){
+	ag5xpby_ssp(chi,1.0/See[s],chi,0.0,chi,s,s); //only appearance of See[0]
+      }
+      // Apply Uinv = (Linv)^T
+      axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,this->Ls-1,this->Ls-1);
+      for(int s=Ls-2;s>=0;s--){
+	axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
+      }
+    }
+    void   ContinuedFractionFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
+    {
+      MooeeInv(psi,chi);
+    }
+    
+    // Constructors
+    ContinuedFractionFermion5D::ContinuedFractionFermion5D(
+							   LatticeGaugeField &_Umu,
+							   GridCartesian         &FiveDimGrid,
+							   GridRedBlackCartesian &FiveDimRedBlackGrid,
+							   GridCartesian         &FourDimGrid,
+							   GridRedBlackCartesian &FourDimRedBlackGrid,
+							   RealD _mass,RealD M5) :
+      WilsonFermion5D(_Umu,
+		      FiveDimGrid, FiveDimRedBlackGrid,
+		      FourDimGrid, FourDimRedBlackGrid,M5),
+      mass(_mass)
+    {
+    }
+
+  }
+}
+
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
new file mode 100644
index 00000000..7f5c022a
--- /dev/null
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
@@ -0,0 +1,53 @@
+#ifndef  GRID_QCD_CONTINUED_FRACTION_H
+#define  GRID_QCD_CONTINUED_FRACTION_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    class ContinuedFractionFermion5D : public WilsonFermion5D
+    {
+    public:
+
+      // override multiply
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+
+      // half checkerboard operaions
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+
+    private:
+
+      Approx::zolotarev_data *zdata;
+
+      // Cont frac
+      RealD mass;
+      RealD R;
+      RealD scale;
+      std::vector<double> Beta;
+      std::vector<double> cc;;
+      std::vector<double> cc_d;;
+      std::vector<double> sqrt_cc;
+      std::vector<double> See;
+      std::vector<double> Aee;
+
+      // Constructors
+      ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
+				 GridCartesian         &FiveDimGrid,
+				 GridRedBlackCartesian &FiveDimRedBlackGrid,
+				 GridCartesian         &FourDimGrid,
+				 GridRedBlackCartesian &FourDimRedBlackGrid,
+				 RealD _mass,RealD M5);
+
+    };
+
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h
new file mode 100644
index 00000000..2abb6eb2
--- /dev/null
+++ b/lib/qcd/action/fermion/DomainWallFermion.h
@@ -0,0 +1,118 @@
+#ifndef  GRID_QCD_DOMAIN_WALL_FERMION_H
+#define  GRID_QCD_DOMAIN_WALL_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class DomainWallFermion : public CayleyFermion5D
+    {
+    public:
+
+      // Constructors
+      DomainWallFermion(LatticeGaugeField &_Umu,
+			GridCartesian         &FiveDimGrid,
+			GridRedBlackCartesian &FiveDimRedBlackGrid,
+			GridCartesian         &FourDimGrid,
+			GridRedBlackCartesian &FourDimRedBlackGrid,
+			RealD _mass,RealD _M5) : 
+
+      CayleyFermion5D(_Umu,
+		      FiveDimGrid,
+		      FiveDimRedBlackGrid,
+		      FourDimGrid,
+		      FourDimRedBlackGrid,_mass,_M5)
+
+      {
+	RealD eps = 1.0;
+
+	zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
+	assert(zdata->n==this->Ls);
+ 
+	///////////////////////////////////////////////////////////
+	// The Cayley coeffs (unprec)
+	///////////////////////////////////////////////////////////
+	this->omega.resize(this->Ls);
+	this->bs.resize(this->Ls);
+	this->cs.resize(this->Ls);
+	this->as.resize(this->Ls);
+	
+	for(int i=0; i < this->Ls; i++){
+	  this->as[i] = 1.0;
+	  this->omega[i] = ((double)zdata -> gamma[i]);
+	  double bb=1.0;
+	  this->bs[i] = 0.5*(bb/(this->omega[i]) + 1.0);
+	  this->cs[i] = 0.5*(bb/(this->omega[i]) - 1.0);
+	}
+
+	////////////////////////////////////////////////////////
+	// Constants for the preconditioned matrix Cayley form
+	////////////////////////////////////////////////////////
+	this->bee.resize(this->Ls);
+	this->cee.resize(this->Ls);
+	this->beo.resize(this->Ls);
+	this->ceo.resize(this->Ls);
+
+	for(int i=0;i<this->Ls;i++){
+	  this->bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0);
+	  this->cee[i]=as[i]*(1.0-cs[i]*(4.0-M5));
+	  this->beo[i]=as[i]*bs[i];
+	  this->ceo[i]=-as[i]*cs[i];
+	}
+
+	aee.resize(this->Ls);
+	aeo.resize(this->Ls);
+	for(int i=0;i<this->Ls;i++){
+	  aee[i]=cee[i];
+	  aeo[i]=ceo[i];
+	}
+
+	//////////////////////////////////////////
+	// LDU decomposition of eeoo
+	//////////////////////////////////////////
+	dee.resize(this->Ls);
+	lee.resize(this->Ls);
+	leem.resize(this->Ls);
+	uee.resize(this->Ls);
+	ueem.resize(this->Ls);
+
+	for(int i=0;i<this->Ls;i++){
+	  
+	  dee[i] = bee[i];
+	  
+	  if ( i < this->Ls-1 ) {
+
+	    lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
+	    
+	    leem[i]=this->mass*cee[this->Ls-1]/bee[0];
+	    for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1];
+	    
+	    uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row
+	    
+	    ueem[i]=this->mass;
+	    for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
+	    ueem[i]*= aee[0]/bee[0];
+	    
+	  } else { 
+	    lee[i] =0.0;
+	    leem[i]=0.0;
+	    uee[i] =0.0;
+	    ueem[i]=0.0;
+	  }
+	}
+	
+	{ 
+	  double delta_d=mass*cee[this->Ls-1];
+	  for(int j=0;j<this->Ls-1;j++) delta_d *= cee[j]/bee[j];
+	  dee[this->Ls-1] += delta_d;
+	}
+      }
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/FermionAction.h b/lib/qcd/action/fermion/FermionOperator.h
similarity index 92%
rename from lib/qcd/action/fermion/FermionAction.h
rename to lib/qcd/action/fermion/FermionOperator.h
index 1b05174b..47c47478 100644
--- a/lib/qcd/action/fermion/FermionAction.h
+++ b/lib/qcd/action/fermion/FermionOperator.h
@@ -1,5 +1,5 @@
-#ifndef  GRID_QCD_WILSON_DOP_H
-#define  GRID_QCD_WILSON_DOP_H
+#ifndef  GRID_QCD_FERMION_OPERATOR_H
+#define  GRID_QCD_FERMION_OPERATOR_H
 
 namespace Grid {
 
@@ -11,7 +11,7 @@ namespace Grid {
     // Think about multiple representations
     //////////////////////////////////////////////////////////////////////////////
     template<class FermionField,class GaugeField>
-    class FermionAction : public CheckerBoardedSparseMatrixBase<FermionField>
+    class FermionOperator : public CheckerBoardedSparseMatrixBase<FermionField>
     {
     public:
 
@@ -40,6 +40,7 @@ namespace Grid {
       virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0;
       virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0;
 
+
     };
 
   }
diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc
new file mode 100644
index 00000000..21f62a75
--- /dev/null
+++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc
@@ -0,0 +1,47 @@
+#ifndef  GRID_QCD_PARTIAL_FRACTION_H
+#define  GRID_QCD_PARTIAL_FRACTION_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    class PartialFractionFermion5D : public WilsonFermion5D
+    {
+    public:
+
+      // override multiply
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+
+      // half checkerboard operaions
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+
+    private:
+
+      zolotarev_data *zdata;
+
+      // Part frac
+      double R=(1+this->mass)/(1-this->mass);
+      std::vector<double> p; 
+      std::vector<double> q;
+
+      // Constructors
+      PartialFractionFermion5D(LatticeGaugeField &_Umu,
+				    GridCartesian         &FiveDimGrid,
+				    GridRedBlackCartesian &FiveDimRedBlackGrid,
+				    GridCartesian         &FourDimGrid,
+				    GridRedBlackCartesian &FourDimRedBlackGrid,
+				    RealD _mass,RealD M5);
+
+    };
+
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h
new file mode 100644
index 00000000..c51aa563
--- /dev/null
+++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h
@@ -0,0 +1,49 @@
+#ifndef  GRID_QCD_PARTIAL_FRACTION_H
+#define  GRID_QCD_PARTIAL_FRACTION_H
+
+namespace Grid {
+
+  namespace QCD {
+
+    class PartialFractionFermion5D : public WilsonFermion5D
+    {
+    public:
+
+      // override multiply
+      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+
+      // half checkerboard operaions
+      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+
+    private:
+
+      virtual void PartialFractionCoefficients(void);
+
+      zolotarev_data *zdata;
+
+      // Part frac
+      double R=(1+this->mass)/(1-this->mass);
+      std::vector<double> p; 
+      std::vector<double> q;
+
+      // Constructors
+      PartialFractionFermion5D(LatticeGaugeField &_Umu,
+				    GridCartesian         &FiveDimGrid,
+				    GridRedBlackCartesian &FiveDimRedBlackGrid,
+				    GridCartesian         &FourDimGrid,
+				    GridRedBlackCartesian &FourDimRedBlackGrid,
+				    RealD _mass,RealD M5);
+
+    };
+
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc
index aa30a7fa..9f2da251 100644
--- a/lib/qcd/action/fermion/WilsonFermion.cc
+++ b/lib/qcd/action/fermion/WilsonFermion.cc
@@ -9,9 +9,9 @@ const std::vector<int> WilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1});
 int WilsonFermion::HandOptDslash;
 
 WilsonFermion::WilsonFermion(LatticeGaugeField &_Umu,
-			   GridCartesian         &Fgrid,
-			   GridRedBlackCartesian &Hgrid, 
-			   double _mass) :
+			     GridCartesian         &Fgrid,
+			     GridRedBlackCartesian &Hgrid, 
+			     RealD _mass) :
   _grid(&Fgrid),
   _cbgrid(&Hgrid),
   Stencil    (&Fgrid,npoint,Even,directions,displacements),
diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h
index 5c208131..6040d328 100644
--- a/lib/qcd/action/fermion/WilsonFermion.h
+++ b/lib/qcd/action/fermion/WilsonFermion.h
@@ -5,7 +5,7 @@ namespace Grid {
 
   namespace QCD {
 
-    class WilsonFermion : public FermionAction<LatticeFermion,LatticeGaugeField>
+    class WilsonFermion : public FermionOperator<LatticeFermion,LatticeGaugeField>
     {
     public:
 
@@ -44,7 +44,7 @@ namespace Grid {
 			int dag);
 
       // Constructor
-      WilsonFermion(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,double _mass);
+      WilsonFermion(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,RealD _mass);
 
       // DoubleStore
       void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
@@ -57,7 +57,7 @@ namespace Grid {
 
     protected:
 
-      double                        mass;
+      RealD                        mass;
 
       GridBase                     *    _grid; 
       GridBase                     *  _cbgrid;
diff --git a/lib/qcd/action/fermion/FiveDimWilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc
similarity index 68%
rename from lib/qcd/action/fermion/FiveDimWilsonFermion.cc
rename to lib/qcd/action/fermion/WilsonFermion5D.cc
index 43645899..d22701b0 100644
--- a/lib/qcd/action/fermion/FiveDimWilsonFermion.cc
+++ b/lib/qcd/action/fermion/WilsonFermion5D.cc
@@ -4,18 +4,18 @@ namespace Grid {
 namespace QCD {
   
   // S-direction is INNERMOST and takes no part in the parity.
-  const std::vector<int> FiveDimWilsonFermion::directions   ({1,2,3,4, 1, 2, 3, 4});
-  const std::vector<int> FiveDimWilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1});
+  const std::vector<int> WilsonFermion5D::directions   ({1,2,3,4, 1, 2, 3, 4});
+  const std::vector<int> WilsonFermion5D::displacements({1,1,1,1,-1,-1,-1,-1});
 
-  int FiveDimWilsonFermion::HandOptDslash;
+  int WilsonFermion5D::HandOptDslash;
 
   // 5d lattice for DWF.
-  FiveDimWilsonFermion::FiveDimWilsonFermion(LatticeGaugeField &_Umu,
+  WilsonFermion5D::WilsonFermion5D(LatticeGaugeField &_Umu,
 					   GridCartesian         &FiveDimGrid,
 					   GridRedBlackCartesian &FiveDimRedBlackGrid,
 					   GridCartesian         &FourDimGrid,
 					   GridRedBlackCartesian &FourDimRedBlackGrid,
-					   double _mass) :
+					   RealD _M5) :
   _FiveDimGrid(&FiveDimGrid),
   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
   _FourDimGrid(&FourDimGrid),
@@ -23,7 +23,7 @@ namespace QCD {
   Stencil    (_FiveDimGrid,npoint,Even,directions,displacements),
   StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
   StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
-  mass(_mass),
+  M5(_M5),
   Umu(_FourDimGrid),
   UmuEven(_FourDimRedBlackGrid),
   UmuOdd (_FourDimRedBlackGrid),
@@ -70,7 +70,7 @@ namespace QCD {
   pickCheckerboard(Even,UmuEven,Umu);
   pickCheckerboard(Odd ,UmuOdd,Umu);
 }
-void FiveDimWilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
+void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
 {
   conformable(Uds._grid,GaugeGrid());
   conformable(Umu._grid,GaugeGrid());
@@ -82,60 +82,9 @@ void FiveDimWilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const Latti
     pokeIndex<LorentzIndex>(Uds,U,mu+4);
   }
 }
-
-RealD FiveDimWilsonFermion::M(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard=in.checkerboard;
-  Dhop(in,out,DaggerNo);
-  return axpy_norm(out,5.0-M5,in,out);
-}
-RealD FiveDimWilsonFermion::Mdag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard=in.checkerboard;
-  Dhop(in,out,DaggerYes);
-  return axpy_norm(out,5.0-M5,in,out);
-}
-void FiveDimWilsonFermion::Meooe(const LatticeFermion &in, LatticeFermion &out)
-{
-  if ( in.checkerboard == Odd ) {
-    DhopEO(in,out,DaggerNo);
-  } else {
-    DhopOE(in,out,DaggerNo);
-  }
-}
-void FiveDimWilsonFermion::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  if ( in.checkerboard == Odd ) {
-    DhopEO(in,out,DaggerYes);
-  } else {
-    DhopOE(in,out,DaggerYes);
-  }
-}
-void FiveDimWilsonFermion::Mooee(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  out = (5.0-M5)*in;
-  return ;
-}
-void FiveDimWilsonFermion::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  Mooee(in,out);
-}
-void FiveDimWilsonFermion::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  out = (1.0/(5.0-M5))*in;
-  return ;
-}
-void FiveDimWilsonFermion::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  MooeeInv(in,out);
-}
-void FiveDimWilsonFermion::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
-					LatticeDoubledGaugeField & U,
-					const LatticeFermion &in, LatticeFermion &out,int dag)
+void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
+				   LatticeDoubledGaugeField & U,
+				   const LatticeFermion &in, LatticeFermion &out,int dag)
 {
   assert((dag==DaggerNo) ||(dag==DaggerYes));
 
@@ -150,19 +99,21 @@ void FiveDimWilsonFermion::DhopInternal(CartesianStencil & st, LebesgueOrder &lo
   // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
   if ( dag == DaggerYes ) {
     if( HandOptDslash ) {
-      for(int ss=0;ss<U._grid->oSites();ss++){
-	int sU=lo.Reorder(ss);
 PARALLEL_FOR_LOOP
+      for(int ss=0;ss<U._grid->oSites();ss++){
 	for(int s=0;s<Ls;s++){
+	  //int sU=lo.Reorder(ss);
+	  int sU=ss;
 	  int sF = s+Ls*sU;
 	  DiracOptHand::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 	}
       }
     } else { 
-      for(int ss=0;ss<U._grid->oSites();ss++){
-	int sU=lo.Reorder(ss);
 PARALLEL_FOR_LOOP
+      for(int ss=0;ss<U._grid->oSites();ss++){
 	for(int s=0;s<Ls;s++){
+	  //	  int sU=lo.Reorder(ss);
+	  int sU=ss;
 	  int sF = s+Ls*sU;
 	  DiracOpt::DhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 	}
@@ -170,21 +121,22 @@ PARALLEL_FOR_LOOP
     }
   } else {
     if( HandOptDslash ) {
-
 PARALLEL_FOR_LOOP
       for(int ss=0;ss<U._grid->oSites();ss++){
-	int sU=lo.Reorder(ss);
 	for(int s=0;s<Ls;s++){
+	  //	  int sU=lo.Reorder(ss);
+	  int sU=ss;
 	  int sF = s+Ls*sU;
 	  DiracOptHand::DhopSite(st,U,comm_buf,sF,sU,in,out);
 	}
       }
 
     } else { 
-      for(int ss=0;ss<U._grid->oSites();ss++){
-	int sU=lo.Reorder(ss);
 PARALLEL_FOR_LOOP
+      for(int ss=0;ss<U._grid->oSites();ss++){
 	for(int s=0;s<Ls;s++){
+	  //	  int sU=lo.Reorder(ss);
+	  int sU=ss;
 	  int sF = s+Ls*sU; 
 	  DiracOpt::DhopSite(st,U,comm_buf,sF,sU,in,out);
 	}
@@ -192,7 +144,7 @@ PARALLEL_FOR_LOOP
     }
   }
 }
-void FiveDimWilsonFermion::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
+void WilsonFermion5D::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
 {
   conformable(in._grid,FermionRedBlackGrid());    // verifies half grid
   conformable(in._grid,out._grid); // drops the cb check
@@ -202,7 +154,7 @@ void FiveDimWilsonFermion::DhopOE(const LatticeFermion &in, LatticeFermion &out,
 
   DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
 }
-void FiveDimWilsonFermion::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
+void WilsonFermion5D::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
 {
   conformable(in._grid,FermionRedBlackGrid());    // verifies half grid
   conformable(in._grid,out._grid); // drops the cb check
@@ -212,7 +164,7 @@ void FiveDimWilsonFermion::DhopEO(const LatticeFermion &in, LatticeFermion &out,
 
   DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
 }
-void FiveDimWilsonFermion::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
+void WilsonFermion5D::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
 {
   conformable(in._grid,FermionGrid()); // verifies full grid
   conformable(in._grid,out._grid);
@@ -221,8 +173,14 @@ void FiveDimWilsonFermion::Dhop(const LatticeFermion &in, LatticeFermion &out,in
 
   DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
 }
-
-}}
+void WilsonFermion5D::DW(const LatticeFermion &in, LatticeFermion &out,int dag)
+{
+  out.checkerboard=in.checkerboard;
+  Dhop(in,out,dag); // -0.5 is included
+  axpy(out,4.0-M5,in,out);
+}
+}
+}
 
 
 
diff --git a/lib/qcd/action/fermion/FiveDimWilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion5D.h
similarity index 72%
rename from lib/qcd/action/fermion/FiveDimWilsonFermion.h
rename to lib/qcd/action/fermion/WilsonFermion5D.h
index 3aa85023..d4777d01 100644
--- a/lib/qcd/action/fermion/FiveDimWilsonFermion.h
+++ b/lib/qcd/action/fermion/WilsonFermion5D.h
@@ -15,7 +15,7 @@ namespace Grid {
     //
     // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
     ////////////////////////////////////////////////////////////////////////////////
-    class FiveDimWilsonFermion : public FermionAction<LatticeFermion,LatticeGaugeField>
+    class WilsonFermion5D : public FermionOperator<LatticeFermion,LatticeGaugeField>
     {
     public:
       ///////////////////////////////////////////////////////////////
@@ -26,19 +26,21 @@ namespace Grid {
       GridBase *FermionGrid(void)            { return _FiveDimGrid;}
       GridBase *FermionRedBlackGrid(void)    { return _FiveDimRedBlackGrid;}
 
-      // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+      // full checkerboard operations; leave unimplemented as abstract for now
+      //virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out)=0;
+      //virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out)=0;
 
-      // half checkerboard operaions
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+      // half checkerboard operations; leave unimplemented as abstract for now
+      //      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out)=0;
+      //      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out)=0;
+      //      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out)=0;
+      //      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out)=0;
+      //      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out)=0;
+      //      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out)=0;
 
-      // non-hermitian hopping term; half cb or both
+      // Implement hopping term non-hermitian hopping term; half cb or both
+      // Implement s-diagonal DW
+      void DW    (const LatticeFermion &in, LatticeFermion &out,int dag);
       void Dhop  (const LatticeFermion &in, LatticeFermion &out,int dag);
       void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
       void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
@@ -54,12 +56,12 @@ namespace Grid {
 			int dag);
 
       // Constructors
-      FiveDimWilsonFermion(LatticeGaugeField &_Umu,
+      WilsonFermion5D(LatticeGaugeField &_Umu,
 			  GridCartesian         &FiveDimGrid,
 			  GridRedBlackCartesian &FiveDimRedBlackGrid,
 			  GridCartesian         &FourDimGrid,
 			  GridRedBlackCartesian &FourDimRedBlackGrid,
-			  double _mass);
+			  double _M5);
 
       // DoubleStore
       void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
@@ -82,7 +84,6 @@ namespace Grid {
       static const std::vector<int> displacements;
 
       double                        M5;
-      double                        mass;
       int Ls;
 
       //Defines the stencils for even and odd
diff --git a/lib/stencil/Grid_stencil_common.cc b/lib/stencil/Grid_stencil_common.cc
index f0f8c581..7f894faf 100644
--- a/lib/stencil/Grid_stencil_common.cc
+++ b/lib/stencil/Grid_stencil_common.cc
@@ -52,8 +52,8 @@ namespace Grid {
 	// up a table containing the npoint "neighbours" and whether they 
 	// live in lattice or a comms buffer.
 	if ( !comm_dim ) {
-	  sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Even);
-	  sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Odd);
+	  sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
+	  sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
 
 	  if ( sshift[0] == sshift[1] ) {
 	    Local(point,dimension,shift,0x3);
@@ -63,8 +63,8 @@ namespace Grid {
 	  }
 	} else { // All permute extract done in comms phase prior to Stencil application
 	  //        So tables are the same whether comm_dim or splice_dim
-	  sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Even);
-	  sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Odd);
+	  sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
+	  sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
 	  if ( sshift[0] == sshift[1] ) {
 	    Comms(point,dimension,shift,0x3);
 	  } else {
@@ -96,7 +96,7 @@ namespace Grid {
 	
 	int cb= (cbmask==0x2)? Odd : Even;
 	  
-	int sshift = _grid->CheckerBoardShift(_checkerboard,dimension,shift,cb);
+	int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
 	int sx     = (x+sshift)%rd;
 	  
 	int permute_slice=0;
@@ -134,7 +134,7 @@ namespace Grid {
                                            // send to one or more remote nodes.
 
       int cb= (cbmask==0x2)? Odd : Even;
-      int sshift= _grid->CheckerBoardShift(_checkerboard,dimension,shift,cb);
+      int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
       
       for(int x=0;x<rd;x++){       
 	

From 35fdba81dd5bf8546b9d93dcba869a7ba1ed915a Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Tue, 2 Jun 2015 17:25:26 +0100
Subject: [PATCH 09/20] Reorg; moving prec/unprec/schur CG for Wilson and DWF
 into tests as these are really tests and not benchmarks (no performance
 reports, only convergence test).

---
 benchmarks/Makefile.am                        | 39 ++-------------
 {benchmarks => tests}/Grid_dwf_cg_prec.cc     |  0
 {benchmarks => tests}/Grid_dwf_cg_schur.cc    |  0
 {benchmarks => tests}/Grid_dwf_cg_unprec.cc   |  0
 {benchmarks => tests}/Grid_dwf_even_odd.cc    |  0
 {benchmarks => tests}/Grid_wilson_cg_prec.cc  |  0
 {benchmarks => tests}/Grid_wilson_cg_schur.cc |  0
 .../Grid_wilson_cg_unprec.cc                  |  0
 {benchmarks => tests}/Grid_wilson_evenodd.cc  |  0
 tests/Makefile.am                             | 50 ++++++++++++++++++-
 10 files changed, 52 insertions(+), 37 deletions(-)
 rename {benchmarks => tests}/Grid_dwf_cg_prec.cc (100%)
 rename {benchmarks => tests}/Grid_dwf_cg_schur.cc (100%)
 rename {benchmarks => tests}/Grid_dwf_cg_unprec.cc (100%)
 rename {benchmarks => tests}/Grid_dwf_even_odd.cc (100%)
 rename {benchmarks => tests}/Grid_wilson_cg_prec.cc (100%)
 rename {benchmarks => tests}/Grid_wilson_cg_schur.cc (100%)
 rename {benchmarks => tests}/Grid_wilson_cg_unprec.cc (100%)
 rename {benchmarks => tests}/Grid_wilson_evenodd.cc (100%)

diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index e92f2e84..e1e092f4 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -10,53 +10,20 @@ bin_PROGRAMS = \
 	Grid_memory_bandwidth \
 	Grid_su3 \
 	Grid_wilson \
-	Grid_wilson_evenodd \
-	Grid_wilson_cg_unprec \
-	Grid_wilson_cg_prec \
-	Grid_wilson_cg_schur \
-	Grid_dwf\
-	Grid_dwf_even_odd\
-	Grid_dwf_cg_unprec\
-	Grid_dwf_cg_prec\
-	Grid_dwf_cg_schur
+	Grid_dwf
 
 Grid_comms_SOURCES = Grid_comms.cc
 Grid_comms_LDADD = -lGrid
 
-Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
-Grid_su3_LDADD = -lGrid
-
 Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
 Grid_memory_bandwidth_LDADD = -lGrid
 
+Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
+Grid_su3_LDADD = -lGrid
 
 Grid_wilson_SOURCES = Grid_wilson.cc
 Grid_wilson_LDADD = -lGrid
 
-Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
-Grid_wilson_evenodd_LDADD = -lGrid
-
-Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
-Grid_wilson_cg_unprec_LDADD = -lGrid
-
-Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc
-Grid_wilson_cg_prec_LDADD = -lGrid
-
-Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc
-Grid_wilson_cg_schur_LDADD = -lGrid
-
 Grid_dwf_SOURCES = Grid_dwf.cc
 Grid_dwf_LDADD = -lGrid
 
-Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc
-Grid_dwf_even_odd_LDADD = -lGrid
-
-Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc
-Grid_dwf_cg_unprec_LDADD = -lGrid
-
-Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc
-Grid_dwf_cg_prec_LDADD = -lGrid
-
-Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc
-Grid_dwf_cg_schur_LDADD = -lGrid
-
diff --git a/benchmarks/Grid_dwf_cg_prec.cc b/tests/Grid_dwf_cg_prec.cc
similarity index 100%
rename from benchmarks/Grid_dwf_cg_prec.cc
rename to tests/Grid_dwf_cg_prec.cc
diff --git a/benchmarks/Grid_dwf_cg_schur.cc b/tests/Grid_dwf_cg_schur.cc
similarity index 100%
rename from benchmarks/Grid_dwf_cg_schur.cc
rename to tests/Grid_dwf_cg_schur.cc
diff --git a/benchmarks/Grid_dwf_cg_unprec.cc b/tests/Grid_dwf_cg_unprec.cc
similarity index 100%
rename from benchmarks/Grid_dwf_cg_unprec.cc
rename to tests/Grid_dwf_cg_unprec.cc
diff --git a/benchmarks/Grid_dwf_even_odd.cc b/tests/Grid_dwf_even_odd.cc
similarity index 100%
rename from benchmarks/Grid_dwf_even_odd.cc
rename to tests/Grid_dwf_even_odd.cc
diff --git a/benchmarks/Grid_wilson_cg_prec.cc b/tests/Grid_wilson_cg_prec.cc
similarity index 100%
rename from benchmarks/Grid_wilson_cg_prec.cc
rename to tests/Grid_wilson_cg_prec.cc
diff --git a/benchmarks/Grid_wilson_cg_schur.cc b/tests/Grid_wilson_cg_schur.cc
similarity index 100%
rename from benchmarks/Grid_wilson_cg_schur.cc
rename to tests/Grid_wilson_cg_schur.cc
diff --git a/benchmarks/Grid_wilson_cg_unprec.cc b/tests/Grid_wilson_cg_unprec.cc
similarity index 100%
rename from benchmarks/Grid_wilson_cg_unprec.cc
rename to tests/Grid_wilson_cg_unprec.cc
diff --git a/benchmarks/Grid_wilson_evenodd.cc b/tests/Grid_wilson_evenodd.cc
similarity index 100%
rename from benchmarks/Grid_wilson_evenodd.cc
rename to tests/Grid_wilson_evenodd.cc
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 7ef4eb1f..6f92a94d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,7 +5,30 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 #
 # Test code
 #
-bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma  Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_cshift_red_black 
+bin_PROGRAMS = Grid_main \
+	Grid_simd \
+	Grid_gamma  \
+	Grid_cshift \
+	Grid_cshift_red_black \
+	Grid_stencil \
+	Grid_nersc_io \
+	Grid_rng \
+	Grid_remez \
+	Grid_rng_fixed \
+	Grid_wilson_evenodd \
+	Grid_wilson_cg_unprec \
+	Grid_wilson_cg_prec \
+	Grid_wilson_cg_schur \
+	Grid_dwf_even_odd\
+	Grid_dwf_cg_unprec\
+	Grid_dwf_cg_prec\
+	Grid_dwf_cg_schur
+
+test: 
+	for f in $bin_PROGRAMS
+	do
+	./$f > $f.log
+	done
 
 Grid_main_SOURCES = Grid_main.cc
 Grid_main_LDADD = -lGrid
@@ -39,3 +62,28 @@ Grid_simd_LDADD = -lGrid
 
 #Grid_simd_new_SOURCES = Grid_simd_new.cc
 #Grid_simd_new_LDADD = -lGrid
+
+Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
+Grid_wilson_evenodd_LDADD = -lGrid
+
+Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
+Grid_wilson_cg_unprec_LDADD = -lGrid
+
+Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc
+Grid_wilson_cg_prec_LDADD = -lGrid
+
+Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc
+Grid_wilson_cg_schur_LDADD = -lGrid
+
+Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc
+Grid_dwf_even_odd_LDADD = -lGrid
+
+Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc
+Grid_dwf_cg_unprec_LDADD = -lGrid
+
+Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc
+Grid_dwf_cg_prec_LDADD = -lGrid
+
+Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc
+Grid_dwf_cg_schur_LDADD = -lGrid
+

From 59163862420a81a5e9d41bd11eae2c5c329e96b9 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 09:36:26 +0100
Subject: [PATCH 10/20] Mobius Caley form, Mobius Zolotarev operators. Pass
 Even Odd vs unprec test and hermiticity checks in tests/Grid_any_evenodd.cc;
 will work on inversion tests shortly.

---
 lib/qcd/action/Actions.h                      |   8 +-
 lib/qcd/action/fermion/CayleyFermion5D.cc     | 110 ++++++++-
 lib/qcd/action/fermion/CayleyFermion5D.h      |   6 +-
 lib/qcd/action/fermion/DomainWallFermion.h    |  83 +------
 lib/qcd/action/fermion/MobiusFermion.h        |  46 ++++
 .../action/fermion/MobiusZolotarevFermion.h   |  48 ++++
 lib/qcd/action/fermion/ScaledShamir.h         |  51 ++++
 lib/qcd/action/fermion/WilsonFermion5D.h      |  12 +
 tests/Grid_any_evenodd.cc                     | 226 ++++++++++++++++++
 tests/Makefile.am                             |  12 +-
 10 files changed, 507 insertions(+), 95 deletions(-)
 create mode 100644 lib/qcd/action/fermion/MobiusFermion.h
 create mode 100644 lib/qcd/action/fermion/MobiusZolotarevFermion.h
 create mode 100644 lib/qcd/action/fermion/ScaledShamir.h
 create mode 100644 tests/Grid_any_evenodd.cc

diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index acbf027c..d37d1cd4 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -38,6 +38,9 @@
 //#include <qcd/action/fermion/PartialFraction.h>
 
 #include <qcd/action/fermion/DomainWallFermion.h>
+#include <qcd/action/fermion/DomainWallFermion.h>
+#include <qcd/action/fermion/MobiusFermion.h>
+#include <qcd/action/fermion/MobiusZolotarevFermion.h>
 //#include <qcd/action/fermion/ScaledShamirCayleyTanh.h>
 
 
@@ -70,11 +73,6 @@
 
   class LinearGaugeAction : public GaugeAction< multi1d<LatticeColorMatrix>, multi1d<LatticeColorMatrix> >
   typedef multi1d<LatticeColorMatrix>  P;
-  typedef multi1d<LatticeColorMatrix>  Q;
-  virtual void staple(LatticeColorMatrix& result,
-		      const Handle< GaugeState<P,Q> >& state,
-		      int mu, int cb) const = 0;
     */
 
-
 #endif
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc
index 263cc28b..be528e79 100644
--- a/lib/qcd/action/fermion/CayleyFermion5D.cc
+++ b/lib/qcd/action/fermion/CayleyFermion5D.cc
@@ -15,7 +15,6 @@ namespace QCD {
 		   FourDimRedBlackGrid,_M5),
    mass(_mass)
  {
-   std::cout << "Constructing a CayleyFermion5D"<<std::endl;
  }
 
   // override multiply
@@ -229,7 +228,112 @@ namespace QCD {
       axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1);  // chi[Ls]
     }
   }
+  
+  void CayleyFermion5D::SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c)
+  {
+
+    ///////////////////////////////////////////////////////////
+    // The Cayley coeffs (unprec)
+    ///////////////////////////////////////////////////////////
+    omega.resize(Ls);
+    bs.resize(Ls);
+    cs.resize(Ls);
+    as.resize(Ls);
+    
+    // 
+    // Ts = (    [bs+cs]Dw        )^-1 (    (bs+cs) Dw         )
+    //     -(g5  -------       -1 )    ( g5 ---------     + 1  )
+    //      (   {2+(bs-cs)Dw}     )    (    2+(bs-cs) Dw       )
+    //
+    //  bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2(  1/omega(b+c) + (b-c) )
+    //  cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2(  1/omega(b+c) - (b-c) )
+    //
+    // bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
+    // bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
+    //
+    // So 
+    //
+    // Ts = (    [b+c]Dw/omega_s    )^-1 (    (b+c) Dw /omega_s        )
+    //     -(g5  -------         -1 )    ( g5 ---------           + 1  )
+    //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               )
+    //
+    // Ts = (    [b+c]Dw            )^-1 (    (b+c) Dw                 )
+    //     -(g5  -------    -omega_s)    ( g5 ---------      + omega_s )
+    //      (   {2+(b-c)Dw}         )    (    2+(b-c) Dw               )
+    // 
+    
+    double bpc = b+c;
+    double bmc = b-c;
+    for(int i=0; i < Ls; i++){
+      as[i] = 1.0;
+      omega[i] = ((double)zdata->gamma[i]); //NB reciprocal relative to Chroma NEF code
+      bs[i] = 0.5*(bpc/omega[i] + bmc);
+      cs[i] = 0.5*(bpc/omega[i] - bmc);
+    }
+
+    ////////////////////////////////////////////////////////
+    // Constants for the preconditioned matrix Cayley form
+    ////////////////////////////////////////////////////////
+    bee.resize(Ls);
+    cee.resize(Ls);
+    beo.resize(Ls);
+    ceo.resize(Ls);
+    
+    for(int i=0;i<Ls;i++){
+      bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0);
+      cee[i]=as[i]*(1.0-cs[i]*(4.0-M5));
+      beo[i]=as[i]*bs[i];
+      ceo[i]=-as[i]*cs[i];
+    }
+
+    aee.resize(Ls);
+    aeo.resize(Ls);
+    for(int i=0;i<Ls;i++){
+      aee[i]=cee[i];
+      aeo[i]=ceo[i];
+    }
+
+    //////////////////////////////////////////
+    // LDU decomposition of eeoo
+    //////////////////////////////////////////
+    dee.resize(Ls);
+    lee.resize(Ls);
+    leem.resize(Ls);
+    uee.resize(Ls);
+    ueem.resize(Ls);
+    
+    for(int i=0;i<Ls;i++){
+      
+      dee[i] = bee[i];
+      
+      if ( i < Ls-1 ) {
+	
+	lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
+	    
+	leem[i]=mass*cee[Ls-1]/bee[0];
+	for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1];
+	
+	uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row
+	
+	ueem[i]=mass;
+	for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
+	ueem[i]*= aee[0]/bee[0];
+	    
+      } else { 
+	lee[i] =0.0;
+	leem[i]=0.0;
+	uee[i] =0.0;
+	ueem[i]=0.0;
+      }
+    }
+	
+    { 
+      double delta_d=mass*cee[Ls-1];
+      for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
+      dee[Ls-1] += delta_d;
+    }
+  }
+
+}}
 
-}
-}
 
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h
index 857ac124..57c71992 100644
--- a/lib/qcd/action/fermion/CayleyFermion5D.h
+++ b/lib/qcd/action/fermion/CayleyFermion5D.h
@@ -22,10 +22,8 @@ namespace Grid {
       virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
 
       //    protected:
-
-      Approx::zolotarev_data *zdata;
-
       RealD mass;
+
       // Cayley form Moebius (tanh and zolotarev)
       std::vector<RealD> omega; 
       std::vector<RealD> bs;    // S dependent coeffs
@@ -53,6 +51,8 @@ namespace Grid {
 		      GridRedBlackCartesian &FourDimRedBlackGrid,
 		      RealD _mass,RealD _M5);
 
+    protected:
+      void SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c);
     };
 
   }
diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h
index 2abb6eb2..3e6a9739 100644
--- a/lib/qcd/action/fermion/DomainWallFermion.h
+++ b/lib/qcd/action/fermion/DomainWallFermion.h
@@ -28,86 +28,13 @@ namespace Grid {
       {
 	RealD eps = 1.0;
 
-	zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
+	Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
 	assert(zdata->n==this->Ls);
+	
+	std::cout << "DomainWallFermion with Ls="<<Ls<<std::endl;
+	// Call base setter
+	this->CayleyFermion5D::SetCoefficients(1.0,zdata,1.0,0.0);
  
-	///////////////////////////////////////////////////////////
-	// The Cayley coeffs (unprec)
-	///////////////////////////////////////////////////////////
-	this->omega.resize(this->Ls);
-	this->bs.resize(this->Ls);
-	this->cs.resize(this->Ls);
-	this->as.resize(this->Ls);
-	
-	for(int i=0; i < this->Ls; i++){
-	  this->as[i] = 1.0;
-	  this->omega[i] = ((double)zdata -> gamma[i]);
-	  double bb=1.0;
-	  this->bs[i] = 0.5*(bb/(this->omega[i]) + 1.0);
-	  this->cs[i] = 0.5*(bb/(this->omega[i]) - 1.0);
-	}
-
-	////////////////////////////////////////////////////////
-	// Constants for the preconditioned matrix Cayley form
-	////////////////////////////////////////////////////////
-	this->bee.resize(this->Ls);
-	this->cee.resize(this->Ls);
-	this->beo.resize(this->Ls);
-	this->ceo.resize(this->Ls);
-
-	for(int i=0;i<this->Ls;i++){
-	  this->bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0);
-	  this->cee[i]=as[i]*(1.0-cs[i]*(4.0-M5));
-	  this->beo[i]=as[i]*bs[i];
-	  this->ceo[i]=-as[i]*cs[i];
-	}
-
-	aee.resize(this->Ls);
-	aeo.resize(this->Ls);
-	for(int i=0;i<this->Ls;i++){
-	  aee[i]=cee[i];
-	  aeo[i]=ceo[i];
-	}
-
-	//////////////////////////////////////////
-	// LDU decomposition of eeoo
-	//////////////////////////////////////////
-	dee.resize(this->Ls);
-	lee.resize(this->Ls);
-	leem.resize(this->Ls);
-	uee.resize(this->Ls);
-	ueem.resize(this->Ls);
-
-	for(int i=0;i<this->Ls;i++){
-	  
-	  dee[i] = bee[i];
-	  
-	  if ( i < this->Ls-1 ) {
-
-	    lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
-	    
-	    leem[i]=this->mass*cee[this->Ls-1]/bee[0];
-	    for(int j=0;j<i;j++)  leem[i]*= aee[j]/bee[j+1];
-	    
-	    uee[i] =-aee[i]/bee[i];   // up-diag entry on the ith row
-	    
-	    ueem[i]=this->mass;
-	    for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
-	    ueem[i]*= aee[0]/bee[0];
-	    
-	  } else { 
-	    lee[i] =0.0;
-	    leem[i]=0.0;
-	    uee[i] =0.0;
-	    ueem[i]=0.0;
-	  }
-	}
-	
-	{ 
-	  double delta_d=mass*cee[this->Ls-1];
-	  for(int j=0;j<this->Ls-1;j++) delta_d *= cee[j]/bee[j];
-	  dee[this->Ls-1] += delta_d;
-	}
       }
 
     };
diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h
new file mode 100644
index 00000000..4c291fad
--- /dev/null
+++ b/lib/qcd/action/fermion/MobiusFermion.h
@@ -0,0 +1,46 @@
+#ifndef  GRID_QCD_MOBIUS_FERMION_H
+#define  GRID_QCD_MOBIUS_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class MobiusFermion : public CayleyFermion5D
+    {
+    public:
+
+      // Constructors
+      MobiusFermion(LatticeGaugeField &_Umu,
+		    GridCartesian         &FiveDimGrid,
+		    GridRedBlackCartesian &FiveDimRedBlackGrid,
+		    GridCartesian         &FourDimGrid,
+		    GridRedBlackCartesian &FourDimRedBlackGrid,
+		    RealD _mass,RealD _M5,
+		    RealD b, RealD c) : 
+      
+      CayleyFermion5D(_Umu,
+		      FiveDimGrid,
+		      FiveDimRedBlackGrid,
+		      FourDimGrid,
+		      FourDimRedBlackGrid,_mass,_M5)
+
+      {
+	RealD eps = 1.0;
+
+	std::cout << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Tanh approx"<<std::endl;
+	Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
+	assert(zdata->n==this->Ls);
+	
+	// Call base setter
+	this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c);
+ 
+      }
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
new file mode 100644
index 00000000..866d0c39
--- /dev/null
+++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
@@ -0,0 +1,48 @@
+#ifndef  GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
+#define  GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class MobiusZolotarevFermion : public CayleyFermion5D
+    {
+    public:
+
+      // Constructors
+       MobiusZolotarevFermion(LatticeGaugeField &_Umu,
+			      GridCartesian         &FiveDimGrid,
+			      GridRedBlackCartesian &FiveDimRedBlackGrid,
+			      GridCartesian         &FourDimGrid,
+			      GridRedBlackCartesian &FourDimRedBlackGrid,
+			      RealD _mass,RealD _M5,
+			      RealD b, RealD c,
+			      RealD lo, RealD hi) : 
+      
+      CayleyFermion5D(_Umu,
+		      FiveDimGrid,
+		      FiveDimRedBlackGrid,
+		      FourDimGrid,
+		      FourDimRedBlackGrid,_mass,_M5)
+
+      {
+	RealD eps = lo/hi;
+
+	Approx::zolotarev_data *zdata = Approx::grid_zolotarev(eps,this->Ls,0);// eps is ignored for higham
+	assert(zdata->n==this->Ls);
+
+	std::cout << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
+	
+	// Call base setter
+	this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c);
+ 
+      }
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/ScaledShamir.h b/lib/qcd/action/fermion/ScaledShamir.h
new file mode 100644
index 00000000..a1fd33d0
--- /dev/null
+++ b/lib/qcd/action/fermion/ScaledShamir.h
@@ -0,0 +1,51 @@
+#ifndef  GRID_QCD_DOMAIN_WALL_FERMION_H
+#define  GRID_QCD_DOMAIN_WALL_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class ScaledShamirFermion : public CayleyFermion5D
+    {
+    public:
+
+      // Constructors
+      ScaledShamirFermion(LatticeGaugeField &_Umu,
+			  GridCartesian         &FiveDimGrid,
+			  GridRedBlackCartesian &FiveDimRedBlackGrid,
+			  GridCartesian         &FourDimGrid,
+			  GridRedBlackCartesian &FourDimRedBlackGrid,
+			  RealD _mass,RealD _M5, RealD scale) : 
+      
+      CayleyFermion5D(_Umu,
+		      FiveDimGrid,
+		      FiveDimRedBlackGrid,
+		      FourDimGrid,
+		      FourDimRedBlackGrid,_mass,_M5,
+		      RealD b, 
+		      RealD c)
+
+      {
+	RealD eps = 1.0;
+
+	Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
+	assert(zdata->n==this->Ls);
+	
+	//b+c = scale;
+	//b-c = 1
+	//b   = 0.5(scale+1);
+	//c   = 0.5(scale-1);
+	
+	// Call base setter
+	this->CayleyFermion5D::SetCoefficients(1.0,zdata,0.5*(scale+1.0),0.5*(scale-1.0));
+
+       }
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h
index d4777d01..062c4d82 100644
--- a/lib/qcd/action/fermion/WilsonFermion5D.h
+++ b/lib/qcd/action/fermion/WilsonFermion5D.h
@@ -14,6 +14,18 @@ namespace Grid {
     // i.e. even even contains fifth dim hopping term.
     //
     // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
+    ////////////////////////////
+    //ContFrac:
+    //  Ls always odd. Rational poly deg is either Ls or Ls-1
+    //PartFrac 
+    //  Ls always odd. Rational poly deg is either Ls or Ls-1
+    //
+    //Cayley: Ls always even, Rational poly deg is Ls
+    // 
+    // Just set nrational as Ls. Forget about Ls-1 cases.
+    //
+    // Require odd Ls for cont and part frac
+    ////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////
     class WilsonFermion5D : public FermionOperator<LatticeFermion,LatticeGaugeField>
     {
diff --git a/tests/Grid_any_evenodd.cc b/tests/Grid_any_evenodd.cc
new file mode 100644
index 00000000..8d8580c1
--- /dev/null
+++ b/tests/Grid_any_evenodd.cc
@@ -0,0 +1,226 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+
+template<class What> 
+void  TestWhat(What & Ddwf,
+	       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+	       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+	       RealD mass, RealD M5,
+	       GridParallelRNG *RNG4,   GridParallelRNG *RNG5);
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  const int Ls=8;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+
+  RealD mass=0.1;
+  RealD M5  =1.8;
+  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestWhat<DomainWallFermion>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  RealD b=1.5;// Scale factor b+c=2, b-c=1
+  RealD c=0.5;
+  MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
+  TestWhat<MobiusFermion>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,4.0);
+  TestWhat<MobiusZolotarevFermion>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+
+  Grid_finalize();
+}
+
+template<class What> 
+void  TestWhat(What & Ddwf, 
+	       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+	       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+	       RealD mass, RealD M5,
+	       GridParallelRNG *RNG4,
+	       GridParallelRNG *RNG5)
+{
+
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion phi   (FGrid); random(*RNG5,phi);
+  LatticeFermion chi   (FGrid); random(*RNG5,chi);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeFermion    ref(FGrid);    ref=zero;
+  LatticeFermion    tmp(FGrid);    tmp=zero;
+  LatticeFermion    err(FGrid);    tmp=zero;
+
+  LatticeFermion src_e (FrbGrid);
+  LatticeFermion src_o (FrbGrid);
+  LatticeFermion r_e   (FrbGrid);
+  LatticeFermion r_o   (FrbGrid);
+  LatticeFermion r_eo  (FGrid);
+  LatticeFermion r_eeoo(FGrid);
+
+  std::cout<<"=========================================================="<<std::endl;
+  std::cout<<"= Testing that Meo + Moe + Moo + Mee = Munprec "<<std::endl;
+  std::cout<<"=========================================================="<<std::endl;
+
+  pickCheckerboard(Even,src_e,src);
+  pickCheckerboard(Odd,src_o,src);
+
+  Ddwf.Meooe(src_e,r_o);  std::cout<<"Applied Meo"<<std::endl;
+  Ddwf.Meooe(src_o,r_e);  std::cout<<"Applied Moe"<<std::endl;
+  setCheckerboard(r_eo,r_o);
+  setCheckerboard(r_eo,r_e);
+
+  Ddwf.Mooee(src_e,r_e);  std::cout<<"Applied Mee"<<std::endl;
+  Ddwf.Mooee(src_o,r_o);  std::cout<<"Applied Moo"<<std::endl;
+  setCheckerboard(r_eeoo,r_e);
+  setCheckerboard(r_eeoo,r_o);
+
+  r_eo=r_eo+r_eeoo;
+  Ddwf.M(src,ref);  
+
+  //  std::cout << r_eo<<std::endl;
+  //  std::cout << ref <<std::endl;
+
+  err= ref - r_eo;
+  std::cout << "EO norm diff   "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
+    
+  LatticeComplex cerr(FGrid);
+  cerr = localInnerProduct(err,err);
+  //  std::cout << cerr<<std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test Ddagger is the dagger of D by requiring                "<<std::endl;
+  std::cout<<"=  < phi | Deo | chi > * = < chi | Deo^dag| phi>  "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+  
+  LatticeFermion chi_e   (FrbGrid);
+  LatticeFermion chi_o   (FrbGrid);
+
+  LatticeFermion dchi_e  (FrbGrid);
+  LatticeFermion dchi_o  (FrbGrid);
+
+  LatticeFermion phi_e   (FrbGrid);
+  LatticeFermion phi_o   (FrbGrid);
+
+  LatticeFermion dphi_e  (FrbGrid);
+  LatticeFermion dphi_o  (FrbGrid);
+
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+  pickCheckerboard(Even,phi_e,phi);
+  pickCheckerboard(Odd ,phi_o,phi);
+
+  Ddwf.Meooe(chi_e,dchi_o);
+  Ddwf.Meooe(chi_o,dchi_e);
+  Ddwf.MeooeDag(phi_e,dphi_o);
+  Ddwf.MeooeDag(phi_o,dphi_e);
+
+  ComplexD pDce = innerProduct(phi_e,dchi_e);
+  ComplexD pDco = innerProduct(phi_o,dchi_o);
+  ComplexD cDpe = innerProduct(chi_e,dphi_e);
+  ComplexD cDpo = innerProduct(chi_o,dphi_o);
+
+  std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
+  std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
+
+  std::cout <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
+  std::cout <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MeeInv Mee = 1                                         "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+
+  Ddwf.Mooee(chi_e,src_e);
+  Ddwf.MooeeInv(src_e,phi_e);
+
+  Ddwf.Mooee(chi_o,src_o);
+  Ddwf.MooeeInv(src_o,phi_o);
+  
+  setCheckerboard(phi,phi_e);
+  setCheckerboard(phi,phi_o);
+
+  err = phi-chi;
+  std::cout << "norm diff   "<< norm2(err)<< std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MeeInvDag MeeDag = 1                                   "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+
+  Ddwf.MooeeDag(chi_e,src_e);
+  Ddwf.MooeeInvDag(src_e,phi_e);
+
+  Ddwf.MooeeDag(chi_o,src_o);
+  Ddwf.MooeeInvDag(src_o,phi_o);
+  
+  setCheckerboard(phi,phi_e);
+  setCheckerboard(phi,phi_o);
+
+  err = phi-chi;
+  std::cout << "norm diff   "<< norm2(err)<< std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MpcDagMpc is Hermitian              "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+  
+  random(*RNG5,phi);
+  random(*RNG5,chi);
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+  pickCheckerboard(Even,phi_e,phi);
+  pickCheckerboard(Odd ,phi_o,phi);
+  RealD t1,t2;
+
+  Ddwf.MpcDagMpc(chi_e,dchi_e,t1,t2);
+  Ddwf.MpcDagMpc(chi_o,dchi_o,t1,t2);
+
+  Ddwf.MpcDagMpc(phi_e,dphi_e,t1,t2);
+  Ddwf.MpcDagMpc(phi_o,dphi_o,t1,t2);
+
+  pDce = innerProduct(phi_e,dchi_e);
+  pDco = innerProduct(phi_o,dchi_o);
+  cDpe = innerProduct(chi_e,dphi_e);
+  cDpo = innerProduct(chi_o,dphi_o);
+
+  std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
+  std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
+
+  std::cout <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
+  std::cout <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
+  
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 6f92a94d..f8f5df29 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -22,13 +22,10 @@ bin_PROGRAMS = Grid_main \
 	Grid_dwf_even_odd\
 	Grid_dwf_cg_unprec\
 	Grid_dwf_cg_prec\
-	Grid_dwf_cg_schur
+	Grid_dwf_cg_schur\
+	Grid_any_evenodd
+
 
-test: 
-	for f in $bin_PROGRAMS
-	do
-	./$f > $f.log
-	done
 
 Grid_main_SOURCES = Grid_main.cc
 Grid_main_LDADD = -lGrid
@@ -66,6 +63,9 @@ Grid_simd_LDADD = -lGrid
 Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
 Grid_wilson_evenodd_LDADD = -lGrid
 
+Grid_any_evenodd_SOURCES = Grid_any_evenodd.cc
+Grid_any_evenodd_LDADD = -lGrid
+
 Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
 Grid_wilson_cg_unprec_LDADD = -lGrid
 

From 343d039b378647f494a9a15f66f14b8e0b1ab4db Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 09:51:06 +0100
Subject: [PATCH 11/20] Scaled Shamir and Scaled Shamir Zolotarev aliases for
 special cases of Mobius.

---
 lib/qcd/action/Actions.h                      |  3 +-
 lib/qcd/action/fermion/ScaledShamir.h         | 51 -------------------
 lib/qcd/action/fermion/ScaledShamirFermion.h  | 37 ++++++++++++++
 .../fermion/ScaledShamirZolotarevFermion.h    | 39 ++++++++++++++
 tests/Grid_any_evenodd.cc                     |  8 ++-
 5 files changed, 85 insertions(+), 53 deletions(-)
 delete mode 100644 lib/qcd/action/fermion/ScaledShamir.h
 create mode 100644 lib/qcd/action/fermion/ScaledShamirFermion.h
 create mode 100644 lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h

diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index d37d1cd4..893564e0 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -41,7 +41,8 @@
 #include <qcd/action/fermion/DomainWallFermion.h>
 #include <qcd/action/fermion/MobiusFermion.h>
 #include <qcd/action/fermion/MobiusZolotarevFermion.h>
-//#include <qcd/action/fermion/ScaledShamirCayleyTanh.h>
+#include <qcd/action/fermion/ScaledShamirFermion.h>
+#include <qcd/action/fermion/ScaledShamirZolotarevFermion.h>
 
 
     // Chroma interface defining FermionAction
diff --git a/lib/qcd/action/fermion/ScaledShamir.h b/lib/qcd/action/fermion/ScaledShamir.h
deleted file mode 100644
index a1fd33d0..00000000
--- a/lib/qcd/action/fermion/ScaledShamir.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef  GRID_QCD_DOMAIN_WALL_FERMION_H
-#define  GRID_QCD_DOMAIN_WALL_FERMION_H
-
-#include <Grid.h>
-
-namespace Grid {
-
-  namespace QCD {
-
-    class ScaledShamirFermion : public CayleyFermion5D
-    {
-    public:
-
-      // Constructors
-      ScaledShamirFermion(LatticeGaugeField &_Umu,
-			  GridCartesian         &FiveDimGrid,
-			  GridRedBlackCartesian &FiveDimRedBlackGrid,
-			  GridCartesian         &FourDimGrid,
-			  GridRedBlackCartesian &FourDimRedBlackGrid,
-			  RealD _mass,RealD _M5, RealD scale) : 
-      
-      CayleyFermion5D(_Umu,
-		      FiveDimGrid,
-		      FiveDimRedBlackGrid,
-		      FourDimGrid,
-		      FourDimRedBlackGrid,_mass,_M5,
-		      RealD b, 
-		      RealD c)
-
-      {
-	RealD eps = 1.0;
-
-	Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham
-	assert(zdata->n==this->Ls);
-	
-	//b+c = scale;
-	//b-c = 1
-	//b   = 0.5(scale+1);
-	//c   = 0.5(scale-1);
-	
-	// Call base setter
-	this->CayleyFermion5D::SetCoefficients(1.0,zdata,0.5*(scale+1.0),0.5*(scale-1.0));
-
-       }
-
-    };
-
-  }
-}
-
-#endif
diff --git a/lib/qcd/action/fermion/ScaledShamirFermion.h b/lib/qcd/action/fermion/ScaledShamirFermion.h
new file mode 100644
index 00000000..59fb16a8
--- /dev/null
+++ b/lib/qcd/action/fermion/ScaledShamirFermion.h
@@ -0,0 +1,37 @@
+#ifndef  GRID_QCD_SCALED_SHAMIR_FERMION_H
+#define  GRID_QCD_SCALED_SHAMIR_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class ScaledShamirFermion : public MobiusFermion
+    {
+    public:
+
+      // Constructors
+    ScaledShamirFermion(LatticeGaugeField &_Umu,
+			GridCartesian         &FiveDimGrid,
+			GridRedBlackCartesian &FiveDimRedBlackGrid,
+			GridCartesian         &FourDimGrid,
+			GridRedBlackCartesian &FourDimRedBlackGrid,
+			RealD _mass,RealD _M5,
+			RealD scale) :
+      
+      // b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
+      MobiusFermion(_Umu,
+		    FiveDimGrid,
+		    FiveDimRedBlackGrid,
+		    FourDimGrid,
+		    FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
+      {
+      }
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h b/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h
new file mode 100644
index 00000000..a4c88d5d
--- /dev/null
+++ b/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h
@@ -0,0 +1,39 @@
+#ifndef  GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H
+#define  GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class ScaledShamirZolotarevFermion : public MobiusZolotarevFermion
+    {
+    public:
+
+      // Constructors
+
+
+    ScaledShamirZolotarevFermion(LatticeGaugeField &_Umu,
+				 GridCartesian         &FiveDimGrid,
+				 GridRedBlackCartesian &FiveDimRedBlackGrid,
+				 GridCartesian         &FourDimGrid,
+				 GridRedBlackCartesian &FourDimRedBlackGrid,
+				 RealD _mass,RealD _M5,
+				 RealD scale,
+				 RealD lo, RealD hi) : 
+      
+      MobiusZolotarevFermion(_Umu,
+			       FiveDimGrid,
+			       FiveDimRedBlackGrid,
+			       FourDimGrid,
+			       FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),lo,hi)
+
+      {}
+
+    };
+
+  }
+}
+
+#endif
diff --git a/tests/Grid_any_evenodd.cc b/tests/Grid_any_evenodd.cc
index 8d8580c1..de1e3c78 100644
--- a/tests/Grid_any_evenodd.cc
+++ b/tests/Grid_any_evenodd.cc
@@ -56,9 +56,15 @@ int main (int argc, char ** argv)
   MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
   TestWhat<MobiusFermion>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
-  MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,4.0);
+  MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
   TestWhat<MobiusZolotarevFermion>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
+  ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
+  TestWhat<ScaledShamirFermion>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  ScaledShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0,0.1,2.0);
+  TestWhat<ScaledShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
 
   Grid_finalize();
 }

From 26e9b04fab8c9e6db1b902ef0a9564785dea172b Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 10:54:03 +0100
Subject: [PATCH 12/20] CG test written and passes i.e. converges with small
 true residual in RedBlack MpcDagMpc, Unprec MdagM and Schur red black solver
 for each of.

DomainWallFermion
MobiusFermion
MobiusZolotarevFermion
ScaledShamirFermion
ScaledShamirZolotarevFermion
---
 .../{Grid_comms.cc => Benchmark_comms.cc}     |   0
 benchmarks/{Grid_dwf.cc => Benchmark_dwf.cc}  |   0
 ...width.cc => Benchmark_memory_bandwidth.cc} |   0
 benchmarks/{Grid_su3.cc => Benchmark_su3.cc}  |   0
 ...Grid_su3_expr.cc => Benchmark_su3_expr.cc} |   0
 ...Grid_su3_test.cc => Benchmark_su3_test.cc} |   0
 .../{Grid_wilson.cc => Benchmark_wilson.cc}   |   0
 benchmarks/Makefile.am                        |  30 ++--
 tests/Makefile.am                             | 121 ++++++-------
 tests/{Grid_cshift.cc => Test_cshift.cc}      |   0
 ..._red_black.cc => Test_cshift_red_black.cc} |   0
 ...rid_dwf_cg_prec.cc => Test_dwf_cg_prec.cc} |   0
 ...d_dwf_cg_schur.cc => Test_dwf_cg_schur.cc} |   0
 ...dwf_cg_unprec.cc => Test_dwf_cg_unprec.cc} |   0
 ...d_dwf_even_odd.cc => Test_dwf_even_odd.cc} |   0
 tests/{Grid_gamma.cc => Test_gamma.cc}        |   0
 tests/{Grid_main.cc => Test_main.cc}          |   0
 tests/Test_many_cg.cc                         | 164 ++++++++++++++++++
 ...id_any_evenodd.cc => Test_many_evenodd.cc} |   0
 tests/{Grid_nersc_io.cc => Test_nersc_io.cc}  |   0
 tests/{Grid_remez.cc => Test_remez.cc}        |   0
 tests/{Grid_rng.cc => Test_rng.cc}            |   0
 .../{Grid_rng_fixed.cc => Test_rng_fixed.cc}  |   0
 tests/{Grid_simd.cc => Test_simd.cc}          |   0
 tests/{Grid_simd_new.cc => Test_simd_new.cc}  |   0
 tests/{Grid_stencil.cc => Test_stencil.cc}    |   0
 ...lson_cg_prec.cc => Test_wilson_cg_prec.cc} |   0
 ...on_cg_schur.cc => Test_wilson_cg_schur.cc} |   0
 ..._cg_unprec.cc => Test_wilson_cg_unprec.cc} |   0
 ...lson_evenodd.cc => Test_wilson_evenodd.cc} |   0
 30 files changed, 241 insertions(+), 74 deletions(-)
 rename benchmarks/{Grid_comms.cc => Benchmark_comms.cc} (100%)
 rename benchmarks/{Grid_dwf.cc => Benchmark_dwf.cc} (100%)
 rename benchmarks/{Grid_memory_bandwidth.cc => Benchmark_memory_bandwidth.cc} (100%)
 rename benchmarks/{Grid_su3.cc => Benchmark_su3.cc} (100%)
 rename benchmarks/{Grid_su3_expr.cc => Benchmark_su3_expr.cc} (100%)
 rename benchmarks/{Grid_su3_test.cc => Benchmark_su3_test.cc} (100%)
 rename benchmarks/{Grid_wilson.cc => Benchmark_wilson.cc} (100%)
 rename tests/{Grid_cshift.cc => Test_cshift.cc} (100%)
 rename tests/{Grid_cshift_red_black.cc => Test_cshift_red_black.cc} (100%)
 rename tests/{Grid_dwf_cg_prec.cc => Test_dwf_cg_prec.cc} (100%)
 rename tests/{Grid_dwf_cg_schur.cc => Test_dwf_cg_schur.cc} (100%)
 rename tests/{Grid_dwf_cg_unprec.cc => Test_dwf_cg_unprec.cc} (100%)
 rename tests/{Grid_dwf_even_odd.cc => Test_dwf_even_odd.cc} (100%)
 rename tests/{Grid_gamma.cc => Test_gamma.cc} (100%)
 rename tests/{Grid_main.cc => Test_main.cc} (100%)
 create mode 100644 tests/Test_many_cg.cc
 rename tests/{Grid_any_evenodd.cc => Test_many_evenodd.cc} (100%)
 rename tests/{Grid_nersc_io.cc => Test_nersc_io.cc} (100%)
 rename tests/{Grid_remez.cc => Test_remez.cc} (100%)
 rename tests/{Grid_rng.cc => Test_rng.cc} (100%)
 rename tests/{Grid_rng_fixed.cc => Test_rng_fixed.cc} (100%)
 rename tests/{Grid_simd.cc => Test_simd.cc} (100%)
 rename tests/{Grid_simd_new.cc => Test_simd_new.cc} (100%)
 rename tests/{Grid_stencil.cc => Test_stencil.cc} (100%)
 rename tests/{Grid_wilson_cg_prec.cc => Test_wilson_cg_prec.cc} (100%)
 rename tests/{Grid_wilson_cg_schur.cc => Test_wilson_cg_schur.cc} (100%)
 rename tests/{Grid_wilson_cg_unprec.cc => Test_wilson_cg_unprec.cc} (100%)
 rename tests/{Grid_wilson_evenodd.cc => Test_wilson_evenodd.cc} (100%)

diff --git a/benchmarks/Grid_comms.cc b/benchmarks/Benchmark_comms.cc
similarity index 100%
rename from benchmarks/Grid_comms.cc
rename to benchmarks/Benchmark_comms.cc
diff --git a/benchmarks/Grid_dwf.cc b/benchmarks/Benchmark_dwf.cc
similarity index 100%
rename from benchmarks/Grid_dwf.cc
rename to benchmarks/Benchmark_dwf.cc
diff --git a/benchmarks/Grid_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc
similarity index 100%
rename from benchmarks/Grid_memory_bandwidth.cc
rename to benchmarks/Benchmark_memory_bandwidth.cc
diff --git a/benchmarks/Grid_su3.cc b/benchmarks/Benchmark_su3.cc
similarity index 100%
rename from benchmarks/Grid_su3.cc
rename to benchmarks/Benchmark_su3.cc
diff --git a/benchmarks/Grid_su3_expr.cc b/benchmarks/Benchmark_su3_expr.cc
similarity index 100%
rename from benchmarks/Grid_su3_expr.cc
rename to benchmarks/Benchmark_su3_expr.cc
diff --git a/benchmarks/Grid_su3_test.cc b/benchmarks/Benchmark_su3_test.cc
similarity index 100%
rename from benchmarks/Grid_su3_test.cc
rename to benchmarks/Benchmark_su3_test.cc
diff --git a/benchmarks/Grid_wilson.cc b/benchmarks/Benchmark_wilson.cc
similarity index 100%
rename from benchmarks/Grid_wilson.cc
rename to benchmarks/Benchmark_wilson.cc
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index e1e092f4..74ff03c6 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -6,24 +6,24 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 # Test code
 #
 bin_PROGRAMS = \
-	Grid_comms \
-	Grid_memory_bandwidth \
-	Grid_su3 \
-	Grid_wilson \
-	Grid_dwf
+	Benchmark_comms \
+	Benchmark_memory_bandwidth \
+	Benchmark_su3 \
+	Benchmark_wilson \
+	Benchmark_dwf
 
-Grid_comms_SOURCES = Grid_comms.cc
-Grid_comms_LDADD = -lGrid
+Benchmark_comms_SOURCES = Benchmark_comms.cc
+Benchmark_comms_LDADD = -lGrid
 
-Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc
-Grid_memory_bandwidth_LDADD = -lGrid
+Benchmark_memory_bandwidth_SOURCES = Benchmark_memory_bandwidth.cc
+Benchmark_memory_bandwidth_LDADD = -lGrid
 
-Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc
-Grid_su3_LDADD = -lGrid
+Benchmark_su3_SOURCES = Benchmark_su3.cc Benchmark_su3_test.cc Benchmark_su3_expr.cc
+Benchmark_su3_LDADD = -lGrid
 
-Grid_wilson_SOURCES = Grid_wilson.cc
-Grid_wilson_LDADD = -lGrid
+Benchmark_wilson_SOURCES = Benchmark_wilson.cc
+Benchmark_wilson_LDADD = -lGrid
 
-Grid_dwf_SOURCES = Grid_dwf.cc
-Grid_dwf_LDADD = -lGrid
+Benchmark_dwf_SOURCES = Benchmark_dwf.cc
+Benchmark_dwf_LDADD = -lGrid
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f8f5df29..bb3448f1 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,85 +5,88 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 #
 # Test code
 #
-bin_PROGRAMS = Grid_main \
-	Grid_simd \
-	Grid_gamma  \
-	Grid_cshift \
-	Grid_cshift_red_black \
-	Grid_stencil \
-	Grid_nersc_io \
-	Grid_rng \
-	Grid_remez \
-	Grid_rng_fixed \
-	Grid_wilson_evenodd \
-	Grid_wilson_cg_unprec \
-	Grid_wilson_cg_prec \
-	Grid_wilson_cg_schur \
-	Grid_dwf_even_odd\
-	Grid_dwf_cg_unprec\
-	Grid_dwf_cg_prec\
-	Grid_dwf_cg_schur\
-	Grid_any_evenodd
+bin_PROGRAMS = Test_main \
+	Test_simd \
+	Test_gamma  \
+	Test_cshift \
+	Test_cshift_red_black \
+	Test_stencil \
+	Test_nersc_io \
+	Test_rng \
+	Test_remez \
+	Test_rng_fixed \
+	Test_wilson_evenodd \
+	Test_wilson_cg_unprec \
+	Test_wilson_cg_prec \
+	Test_wilson_cg_schur \
+	Test_dwf_even_odd\
+	Test_dwf_cg_unprec\
+	Test_dwf_cg_prec\
+	Test_dwf_cg_schur\
+	Test_many_evenodd\
+	Test_many_cg
 
 
 
-Grid_main_SOURCES = Grid_main.cc
-Grid_main_LDADD = -lGrid
+Test_main_SOURCES = Test_main.cc
+Test_main_LDADD = -lGrid
 
-Grid_rng_SOURCES = Grid_rng.cc
-Grid_rng_LDADD = -lGrid
+Test_rng_SOURCES = Test_rng.cc
+Test_rng_LDADD = -lGrid
 
-Grid_rng_fixed_SOURCES = Grid_rng_fixed.cc
-Grid_rng_fixed_LDADD = -lGrid
+Test_rng_fixed_SOURCES = Test_rng_fixed.cc
+Test_rng_fixed_LDADD = -lGrid
 
-Grid_remez_SOURCES = Grid_remez.cc
-Grid_remez_LDADD = -lGrid
+Test_remez_SOURCES = Test_remez.cc
+Test_remez_LDADD = -lGrid
 
-Grid_nersc_io_SOURCES = Grid_nersc_io.cc
-Grid_nersc_io_LDADD = -lGrid
+Test_nersc_io_SOURCES = Test_nersc_io.cc
+Test_nersc_io_LDADD = -lGrid
 
-Grid_cshift_SOURCES = Grid_cshift.cc
-Grid_cshift_LDADD = -lGrid
+Test_cshift_SOURCES = Test_cshift.cc
+Test_cshift_LDADD = -lGrid
 
-Grid_cshift_red_black_SOURCES = Grid_cshift_red_black.cc
-Grid_cshift_red_black_LDADD = -lGrid
+Test_cshift_red_black_SOURCES = Test_cshift_red_black.cc
+Test_cshift_red_black_LDADD = -lGrid
 
-Grid_gamma_SOURCES = Grid_gamma.cc
-Grid_gamma_LDADD = -lGrid
+Test_gamma_SOURCES = Test_gamma.cc
+Test_gamma_LDADD = -lGrid
 
-Grid_stencil_SOURCES = Grid_stencil.cc
-Grid_stencil_LDADD = -lGrid
+Test_stencil_SOURCES = Test_stencil.cc
+Test_stencil_LDADD = -lGrid
 
-Grid_simd_SOURCES = Grid_simd.cc
-Grid_simd_LDADD = -lGrid
+Test_simd_SOURCES = Test_simd.cc
+Test_simd_LDADD = -lGrid
 
-#Grid_simd_new_SOURCES = Grid_simd_new.cc
-#Grid_simd_new_LDADD = -lGrid
+#Test_simd_new_SOURCES = Test_simd_new.cc
+#Test_simd_new_LDADD = -lGrid
 
-Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc
-Grid_wilson_evenodd_LDADD = -lGrid
+Test_wilson_evenodd_SOURCES = Test_wilson_evenodd.cc
+Test_wilson_evenodd_LDADD = -lGrid
 
-Grid_any_evenodd_SOURCES = Grid_any_evenodd.cc
-Grid_any_evenodd_LDADD = -lGrid
+Test_wilson_cg_unprec_SOURCES = Test_wilson_cg_unprec.cc
+Test_wilson_cg_unprec_LDADD = -lGrid
 
-Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc
-Grid_wilson_cg_unprec_LDADD = -lGrid
+Test_wilson_cg_prec_SOURCES = Test_wilson_cg_prec.cc
+Test_wilson_cg_prec_LDADD = -lGrid
 
-Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc
-Grid_wilson_cg_prec_LDADD = -lGrid
+Test_wilson_cg_schur_SOURCES = Test_wilson_cg_schur.cc
+Test_wilson_cg_schur_LDADD = -lGrid
 
-Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc
-Grid_wilson_cg_schur_LDADD = -lGrid
+Test_dwf_even_odd_SOURCES = Test_dwf_even_odd.cc
+Test_dwf_even_odd_LDADD = -lGrid
 
-Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc
-Grid_dwf_even_odd_LDADD = -lGrid
+Test_dwf_cg_unprec_SOURCES = Test_dwf_cg_unprec.cc
+Test_dwf_cg_unprec_LDADD = -lGrid
 
-Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc
-Grid_dwf_cg_unprec_LDADD = -lGrid
+Test_dwf_cg_prec_SOURCES = Test_dwf_cg_prec.cc
+Test_dwf_cg_prec_LDADD = -lGrid
 
-Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc
-Grid_dwf_cg_prec_LDADD = -lGrid
+Test_dwf_cg_schur_SOURCES = Test_dwf_cg_schur.cc
+Test_dwf_cg_schur_LDADD = -lGrid
 
-Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc
-Grid_dwf_cg_schur_LDADD = -lGrid
+Test_many_evenodd_SOURCES = Test_many_evenodd.cc
+Test_many_evenodd_LDADD = -lGrid
 
+Test_many_cg_SOURCES = Test_many_cg.cc
+Test_many_cg_LDADD = -lGrid
diff --git a/tests/Grid_cshift.cc b/tests/Test_cshift.cc
similarity index 100%
rename from tests/Grid_cshift.cc
rename to tests/Test_cshift.cc
diff --git a/tests/Grid_cshift_red_black.cc b/tests/Test_cshift_red_black.cc
similarity index 100%
rename from tests/Grid_cshift_red_black.cc
rename to tests/Test_cshift_red_black.cc
diff --git a/tests/Grid_dwf_cg_prec.cc b/tests/Test_dwf_cg_prec.cc
similarity index 100%
rename from tests/Grid_dwf_cg_prec.cc
rename to tests/Test_dwf_cg_prec.cc
diff --git a/tests/Grid_dwf_cg_schur.cc b/tests/Test_dwf_cg_schur.cc
similarity index 100%
rename from tests/Grid_dwf_cg_schur.cc
rename to tests/Test_dwf_cg_schur.cc
diff --git a/tests/Grid_dwf_cg_unprec.cc b/tests/Test_dwf_cg_unprec.cc
similarity index 100%
rename from tests/Grid_dwf_cg_unprec.cc
rename to tests/Test_dwf_cg_unprec.cc
diff --git a/tests/Grid_dwf_even_odd.cc b/tests/Test_dwf_even_odd.cc
similarity index 100%
rename from tests/Grid_dwf_even_odd.cc
rename to tests/Test_dwf_even_odd.cc
diff --git a/tests/Grid_gamma.cc b/tests/Test_gamma.cc
similarity index 100%
rename from tests/Grid_gamma.cc
rename to tests/Test_gamma.cc
diff --git a/tests/Grid_main.cc b/tests/Test_main.cc
similarity index 100%
rename from tests/Grid_main.cc
rename to tests/Test_main.cc
diff --git a/tests/Test_many_cg.cc b/tests/Test_many_cg.cc
new file mode 100644
index 00000000..3241a025
--- /dev/null
+++ b/tests/Test_many_cg.cc
@@ -0,0 +1,164 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+template<class What> 
+void  TestCGinversions(What & Ddwf, 
+		       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		       RealD mass, RealD M5,
+		       GridParallelRNG *RNG4,
+		       GridParallelRNG *RNG5);
+template<class What> 
+void  TestCGschur(What & Ddwf, 
+		  GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		  GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		  RealD mass, RealD M5,
+		  GridParallelRNG *RNG4,
+		  GridParallelRNG *RNG5);
+
+template<class What> 
+void  TestCGunprec(What & Ddwf, 
+		   GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		   GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		   RealD mass, RealD M5,
+		   GridParallelRNG *RNG4,
+		   GridParallelRNG *RNG5);
+
+template<class What> 
+void  TestCGprec(What & Ddwf, 
+		 GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		 GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		 RealD mass, RealD M5,
+		 GridParallelRNG *RNG4,
+		 GridParallelRNG *RNG5);
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  const int Ls=8;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+
+  RealD mass=0.1;
+  RealD M5  =1.8;
+  std::cout <<"DomainWallFermion test"<<std::endl;
+  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestCGinversions<DomainWallFermion>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  RealD b=1.5;// Scale factor b+c=2, b-c=1
+  RealD c=0.5;
+  std::cout <<"MobiusFermion test"<<std::endl;
+  MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
+  TestCGinversions<MobiusFermion>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"MobiusZolotarevFermion test"<<std::endl;
+  MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
+  TestCGinversions<MobiusZolotarevFermion>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"ScaledShamirFermion test"<<std::endl;
+  ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
+  TestCGinversions<ScaledShamirFermion>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"ScaledShamirZolotarevFermion test"<<std::endl;
+  ScaledShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0,0.1,2.0);
+  TestCGinversions<ScaledShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  Grid_finalize();
+}
+template<class What> 
+void  TestCGinversions(What & Ddwf, 
+		       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		       RealD mass, RealD M5,
+		       GridParallelRNG *RNG4,
+		       GridParallelRNG *RNG5)
+{
+  std::cout << "Testing unpreconditioned inverter"<<std::endl;
+  TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+  std::cout << "Testing red black preconditioned inverter"<<std::endl;
+  TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+  std::cout << "Testing red black Schur inverter"<<std::endl;
+  TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+}
+
+template<class What> 
+void  TestCGunprec(What & Ddwf, 
+		   GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		   GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		   RealD mass, RealD M5,
+		   GridParallelRNG *RNG4,
+		   GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+
+  HermitianOperator<What,LatticeFermion> HermOp(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  CG(HermOp,src,result);
+
+}
+template<class What> 
+void  TestCGprec(What & Ddwf, 
+		 GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		 GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		 RealD mass, RealD M5,
+		 GridParallelRNG *RNG4,
+		 GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion    src_o(FrbGrid);
+  LatticeFermion result_o(FrbGrid);
+  pickCheckerboard(Odd,src_o,src);
+  result_o=zero;
+
+  HermitianCheckerBoardedOperator<What,LatticeFermion> HermOpEO(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  CG(HermOpEO,src_o,result_o);
+}
+
+
+template<class What> 
+void  TestCGschur(What & Ddwf, 
+		   GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		   GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		   RealD mass, RealD M5,
+		   GridParallelRNG *RNG4,
+		   GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  SchurRedBlackSolve<LatticeFermion> SchurSolver(CG);
+  SchurSolver(Ddwf,src,result);
+}
diff --git a/tests/Grid_any_evenodd.cc b/tests/Test_many_evenodd.cc
similarity index 100%
rename from tests/Grid_any_evenodd.cc
rename to tests/Test_many_evenodd.cc
diff --git a/tests/Grid_nersc_io.cc b/tests/Test_nersc_io.cc
similarity index 100%
rename from tests/Grid_nersc_io.cc
rename to tests/Test_nersc_io.cc
diff --git a/tests/Grid_remez.cc b/tests/Test_remez.cc
similarity index 100%
rename from tests/Grid_remez.cc
rename to tests/Test_remez.cc
diff --git a/tests/Grid_rng.cc b/tests/Test_rng.cc
similarity index 100%
rename from tests/Grid_rng.cc
rename to tests/Test_rng.cc
diff --git a/tests/Grid_rng_fixed.cc b/tests/Test_rng_fixed.cc
similarity index 100%
rename from tests/Grid_rng_fixed.cc
rename to tests/Test_rng_fixed.cc
diff --git a/tests/Grid_simd.cc b/tests/Test_simd.cc
similarity index 100%
rename from tests/Grid_simd.cc
rename to tests/Test_simd.cc
diff --git a/tests/Grid_simd_new.cc b/tests/Test_simd_new.cc
similarity index 100%
rename from tests/Grid_simd_new.cc
rename to tests/Test_simd_new.cc
diff --git a/tests/Grid_stencil.cc b/tests/Test_stencil.cc
similarity index 100%
rename from tests/Grid_stencil.cc
rename to tests/Test_stencil.cc
diff --git a/tests/Grid_wilson_cg_prec.cc b/tests/Test_wilson_cg_prec.cc
similarity index 100%
rename from tests/Grid_wilson_cg_prec.cc
rename to tests/Test_wilson_cg_prec.cc
diff --git a/tests/Grid_wilson_cg_schur.cc b/tests/Test_wilson_cg_schur.cc
similarity index 100%
rename from tests/Grid_wilson_cg_schur.cc
rename to tests/Test_wilson_cg_schur.cc
diff --git a/tests/Grid_wilson_cg_unprec.cc b/tests/Test_wilson_cg_unprec.cc
similarity index 100%
rename from tests/Grid_wilson_cg_unprec.cc
rename to tests/Test_wilson_cg_unprec.cc
diff --git a/tests/Grid_wilson_evenodd.cc b/tests/Test_wilson_evenodd.cc
similarity index 100%
rename from tests/Grid_wilson_evenodd.cc
rename to tests/Test_wilson_evenodd.cc

From 8fe3d4f97126a300f76da35f3cbe9d943f6541c7 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 11:26:54 +0100
Subject: [PATCH 13/20] Overlap Wilson Cayley tanh & zolo

---
 lib/qcd/action/Actions.h                      | 27 ++++++++++---
 .../action/fermion/MobiusZolotarevFermion.h   |  1 +
 .../fermion/OverlapWilsonCayleyTanhFermion.h  | 34 ++++++++++++++++
 .../OverlapWilsonCayleyZolotarevFermion.h     | 37 ++++++++++++++++++
 .../action/fermion/PartialFractionFermion5D.h |  4 +-
 .../fermion/ScaledShamirZolotarevFermion.h    | 39 -------------------
 .../action/fermion/ShamirZolotarevFermion.h   | 39 +++++++++++++++++++
 tests/Test_many_cg.cc                         | 14 +++++--
 tests/Test_many_evenodd.cc                    | 17 +++++++-
 9 files changed, 161 insertions(+), 51 deletions(-)
 create mode 100644 lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h
 create mode 100644 lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h
 delete mode 100644 lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h
 create mode 100644 lib/qcd/action/fermion/ShamirZolotarevFermion.h

diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index 893564e0..8a8c4642 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -30,19 +30,36 @@
 //#include <qcd/action/fermion/CloverFermion.h>
 
 ////////////////////////////////////////////
-// 5D formulations
+// 5D formulations...
 ////////////////////////////////////////////
+
 #include <qcd/action/fermion/WilsonFermion5D.h> // used by all 5d overlap types
+
+//////////
+// Cayley
+//////////
 #include <qcd/action/fermion/CayleyFermion5D.h>
-#include <qcd/action/fermion/ContinuedFractionFermion5D.h>
-//#include <qcd/action/fermion/PartialFraction.h>
 
 #include <qcd/action/fermion/DomainWallFermion.h>
 #include <qcd/action/fermion/DomainWallFermion.h>
+
 #include <qcd/action/fermion/MobiusFermion.h>
-#include <qcd/action/fermion/MobiusZolotarevFermion.h>
 #include <qcd/action/fermion/ScaledShamirFermion.h>
-#include <qcd/action/fermion/ScaledShamirZolotarevFermion.h>
+#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
+
+#include <qcd/action/fermion/MobiusZolotarevFermion.h>
+#include <qcd/action/fermion/ShamirZolotarevFermion.h>
+#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
+
+//////////////////////
+// Continued fraction
+//////////////////////
+#include <qcd/action/fermion/ContinuedFractionFermion5D.h>
+
+//////////////////////
+// Partial fraction
+//////////////////////
+#include <qcd/action/fermion/PartialFractionFermion5D.h>
 
 
     // Chroma interface defining FermionAction
diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
index 866d0c39..9ac795d9 100644
--- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h
+++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
@@ -34,6 +34,7 @@ namespace Grid {
 	assert(zdata->n==this->Ls);
 
 	std::cout << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
+	std::cout << "MobiusZolotarevFermion : note there is a degeneracy between (b+c) and Zolo param hi"<<std::endl;
 	
 	// Call base setter
 	this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c);
diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h
new file mode 100644
index 00000000..e764c8ae
--- /dev/null
+++ b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h
@@ -0,0 +1,34 @@
+#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class OverlapWilsonCayleyTanhFermion : public MobiusFermion
+    {
+    public:
+
+      // Constructors
+    OverlapWilsonCayleyTanhFermion(LatticeGaugeField &_Umu,
+				   GridCartesian         &FiveDimGrid,
+				   GridRedBlackCartesian &FiveDimRedBlackGrid,
+				   GridCartesian         &FourDimGrid,
+				   GridRedBlackCartesian &FourDimRedBlackGrid,
+				   RealD _mass,RealD _M5,
+				   RealD scale) :
+      
+      // b+c=scale, b-c = 0 <=> b =c = scale/2
+      MobiusFermion(_Umu,
+		    FiveDimGrid,
+		    FiveDimRedBlackGrid,
+		    FourDimGrid,
+		    FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale)
+	{
+	}
+    };
+  }
+}
+#endif
diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h
new file mode 100644
index 00000000..82c43fb7
--- /dev/null
+++ b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h
@@ -0,0 +1,37 @@
+#ifndef  OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
+#define  OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion
+    {
+    public:
+
+      // Constructors
+
+    OverlapWilsonCayleyZolotarevFermion(LatticeGaugeField &_Umu,
+					GridCartesian         &FiveDimGrid,
+					GridRedBlackCartesian &FiveDimRedBlackGrid,
+					GridCartesian         &FourDimGrid,
+					GridRedBlackCartesian &FourDimRedBlackGrid,
+					RealD _mass,RealD _M5,
+					RealD lo, RealD hi) : 
+      // b+c=1.0, b-c = 0 <=> b =c = 1/2
+      MobiusZolotarevFermion(_Umu,
+			     FiveDimGrid,
+			     FiveDimRedBlackGrid,
+			     FourDimGrid,
+			     FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi)
+
+      {}
+
+    };
+
+  }
+}
+
+#endif
diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h
index c51aa563..95f8c0f9 100644
--- a/lib/qcd/action/fermion/PartialFractionFermion5D.h
+++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h
@@ -25,10 +25,10 @@ namespace Grid {
 
       virtual void PartialFractionCoefficients(void);
 
-      zolotarev_data *zdata;
+      Approx::zolotarev_data *zdata;
 
       // Part frac
-      double R=(1+this->mass)/(1-this->mass);
+      double R;
       std::vector<double> p; 
       std::vector<double> q;
 
diff --git a/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h b/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h
deleted file mode 100644
index a4c88d5d..00000000
--- a/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef  GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H
-#define  GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H
-
-#include <Grid.h>
-
-namespace Grid {
-
-  namespace QCD {
-
-    class ScaledShamirZolotarevFermion : public MobiusZolotarevFermion
-    {
-    public:
-
-      // Constructors
-
-
-    ScaledShamirZolotarevFermion(LatticeGaugeField &_Umu,
-				 GridCartesian         &FiveDimGrid,
-				 GridRedBlackCartesian &FiveDimRedBlackGrid,
-				 GridCartesian         &FourDimGrid,
-				 GridRedBlackCartesian &FourDimRedBlackGrid,
-				 RealD _mass,RealD _M5,
-				 RealD scale,
-				 RealD lo, RealD hi) : 
-      
-      MobiusZolotarevFermion(_Umu,
-			       FiveDimGrid,
-			       FiveDimRedBlackGrid,
-			       FourDimGrid,
-			       FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),lo,hi)
-
-      {}
-
-    };
-
-  }
-}
-
-#endif
diff --git a/lib/qcd/action/fermion/ShamirZolotarevFermion.h b/lib/qcd/action/fermion/ShamirZolotarevFermion.h
new file mode 100644
index 00000000..6a7df439
--- /dev/null
+++ b/lib/qcd/action/fermion/ShamirZolotarevFermion.h
@@ -0,0 +1,39 @@
+#ifndef  GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
+#define  GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class ShamirZolotarevFermion : public MobiusZolotarevFermion
+    {
+    public:
+
+      // Constructors
+
+
+    ShamirZolotarevFermion(LatticeGaugeField &_Umu,
+			   GridCartesian         &FiveDimGrid,
+			   GridRedBlackCartesian &FiveDimRedBlackGrid,
+			   GridCartesian         &FourDimGrid,
+			   GridRedBlackCartesian &FourDimRedBlackGrid,
+			   RealD _mass,RealD _M5,
+			   RealD lo, RealD hi) : 
+      
+      // b+c = 1; b-c = 1 => b=1, c=0
+      MobiusZolotarevFermion(_Umu,
+			     FiveDimGrid,
+			     FiveDimRedBlackGrid,
+			     FourDimGrid,
+			     FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi)
+      
+      {}
+
+    };
+
+  }
+}
+
+#endif
diff --git a/tests/Test_many_cg.cc b/tests/Test_many_cg.cc
index 3241a025..4510d4b5 100644
--- a/tests/Test_many_cg.cc
+++ b/tests/Test_many_cg.cc
@@ -89,9 +89,17 @@ int main (int argc, char ** argv)
   ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
   TestCGinversions<ScaledShamirFermion>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
-  std::cout <<"ScaledShamirZolotarevFermion test"<<std::endl;
-  ScaledShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0,0.1,2.0);
-  TestCGinversions<ScaledShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  std::cout <<"ShamirZolotarevFermion test"<<std::endl;
+  ShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestCGinversions<ShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
+  OverlapWilsonCayleyTanhFermion Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonCayleyTanhFermion>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
+  OverlapWilsonCayleyZolotarevFermion Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestCGinversions<OverlapWilsonCayleyZolotarevFermion>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }
diff --git a/tests/Test_many_evenodd.cc b/tests/Test_many_evenodd.cc
index de1e3c78..df28981b 100644
--- a/tests/Test_many_evenodd.cc
+++ b/tests/Test_many_evenodd.cc
@@ -48,23 +48,36 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
   RealD M5  =1.8;
+  std::cout <<"DomainWallFermion test"<<std::endl;
   DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
   TestWhat<DomainWallFermion>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   RealD b=1.5;// Scale factor b+c=2, b-c=1
   RealD c=0.5;
+  std::cout <<"MobiusFermion test"<<std::endl;
   MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
   TestWhat<MobiusFermion>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
+  std::cout <<"MobiusZolotarevFermion test"<<std::endl;
   MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
   TestWhat<MobiusZolotarevFermion>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
+  std::cout <<"ScaledShamirFermion test"<<std::endl;
   ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
   TestWhat<ScaledShamirFermion>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
-  ScaledShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0,0.1,2.0);
-  TestWhat<ScaledShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
+  std::cout <<"ShamirZolotarevFermion test"<<std::endl;
+  ShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestWhat<ShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
+  OverlapWilsonCayleyTanhFermion Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestWhat<OverlapWilsonCayleyTanhFermion>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
+  OverlapWilsonCayleyZolotarevFermion Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestWhat<OverlapWilsonCayleyZolotarevFermion>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }

From 4bcc319e11c2ac4babbbed22fb65a51f1c3787b1 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 12:47:05 +0100
Subject: [PATCH 14/20] Reorganise of file naming

---
 Makefile.am                                   |   2 +
 configure                                     |   4 +-
 configure.ac                                  |   2 +-
 lib/{Grid_algorithms.h => Algorithms.h}       |   0
 ...aligned_allocator.h => AlignedAllocator.h} |   0
 lib/Cartesian.h                               |   8 ++
 lib/{Grid_communicator.h => Communicator.h}   |   2 +-
 lib/{Grid_comparison.h => Comparison.h}       |   4 +-
 lib/{Grid_cshift.h => Cshift.h}               |   6 +-
 lib/Grid.h                                    |  26 ++--
 lib/{Grid_config.h => GridConfig.h}           |   0
 lib/{Grid_config.h.in => GridConfig.h.in}     |   2 +-
 lib/{Grid_init.cc => GridInit.cc}             |   0
 lib/Grid_cartesian.h                          |   8 --
 lib/Grid_math.h                               |  16 ---
 lib/{Grid_lattice.h => Lattice.h}             |   2 +-
 lib/Makefile.am                               | 113 ++----------------
 lib/{Grid_simd.h => Simd.h}                   |   0
 lib/{Grid_stencil.h => Stencil.h}             |   2 +-
 lib/Tensors.h                                 |  16 +++
 lib/{Grid_threads.h => Threads.h}             |   0
 ...Grid_cartesian_base.h => Cartesian_base.h} |   1 -
 ...Grid_cartesian_full.h => Cartesian_full.h} |   0
 ...sian_red_black.h => Cartesian_red_black.h} |   0
 ...ommunicator_base.h => Communicator_base.h} |   0
 ...ommunicator_mpi.cc => Communicator_mpi.cc} |   0
 ...municator_none.cc => Communicator_none.cc} |   0
 .../{Grid_cshift_common.h => Cshift_common.h} |   0
 .../{Grid_cshift_mpi.h => Cshift_mpi.h}       |   0
 .../{Grid_cshift_none.h => Cshift_none.h}     |   0
 .../{Grid_lattice_ET.h => Lattice_ET.h}       |   0
 .../{Grid_lattice_arith.h => Lattice_arith.h} |   0
 .../{Grid_lattice_base.h => Lattice_base.h}   |  27 ++---
 ...tice_comparison.h => Lattice_comparison.h} |   0
 ...ce_conformable.h => Lattice_conformable.h} |   0
 ...tice_coordinate.h => Lattice_coordinate.h} |   0
 .../{Grid_lattice_local.h => Lattice_local.h} |   0
 ..._lattice_overload.h => Lattice_overload.h} |   0
 ..._lattice_peekpoke.h => Lattice_peekpoke.h} |   0
 ...id_lattice_reality.h => Lattice_reality.h} |   0
 ...attice_reduction.h => Lattice_reduction.h} |   0
 .../{Grid_lattice_rng.h => Lattice_rng.h}     |   0
 .../{Grid_lattice_trace.h => Lattice_trace.h} |   0
 ..._lattice_transfer.h => Lattice_transfer.h} |   0
 ...attice_transpose.h => Lattice_transpose.h} |   0
 .../{Grid_lattice_where.h => Lattice_where.h} |   0
 lib/math/Grid_math_arith.h                    |  11 --
 lib/parallelIO/{GridNerscIO.h => NerscIO.h}   |   0
 .../fermion/PartialFractionFermion5D.cc       |  46 -------
 lib/stencil/{Grid_lebesgue.cc => Lebesgue.cc} |   0
 lib/stencil/{Grid_lebesgue.h => Lebesgue.h}   |   0
 ...id_stencil_common.cc => Stencil_common.cc} |   0
 lib/tensors/Tensor_arith.h                    |  11 ++
 .../Tensor_arith_add.h}                       |   0
 .../Tensor_arith_mac.h}                       |   0
 .../Tensor_arith_mul.h}                       |   0
 .../Tensor_arith_scalar.h}                    |   0
 .../Tensor_arith_sub.h}                       |   0
 .../Tensor_class.h}                           |   0
 .../Tensor_extract_merge.h}                   |   0
 .../Tensor_inner.h}                           |   0
 .../Tensor_outer.h}                           |   0
 .../Tensor_peek.h}                            |   0
 .../Tensor_poke.h}                            |   0
 .../Tensor_reality.h}                         |   0
 .../Tensor_trace.h}                           |   0
 .../Tensor_traits.h}                          |   0
 .../Tensor_transpose.h}                       |   0
 tests/Test_cshift.cc                          |   1 -
 tests/Test_cshift_red_black.cc                |   1 -
 tests/Test_gamma.cc                           |   1 -
 tests/Test_nersc_io.cc                        |   1 -
 tests/Test_rng.cc                             |   1 -
 tests/Test_rng_fixed.cc                       |   1 -
 tests/Test_simd.cc                            |   1 -
 75 files changed, 85 insertions(+), 231 deletions(-)
 rename lib/{Grid_algorithms.h => Algorithms.h} (100%)
 rename lib/{Grid_aligned_allocator.h => AlignedAllocator.h} (100%)
 create mode 100644 lib/Cartesian.h
 rename lib/{Grid_communicator.h => Communicator.h} (57%)
 rename lib/{Grid_comparison.h => Comparison.h} (98%)
 rename lib/{Grid_cshift.h => Cshift.h} (51%)
 rename lib/{Grid_config.h => GridConfig.h} (100%)
 rename lib/{Grid_config.h.in => GridConfig.h.in} (97%)
 rename lib/{Grid_init.cc => GridInit.cc} (100%)
 delete mode 100644 lib/Grid_cartesian.h
 delete mode 100644 lib/Grid_math.h
 rename lib/{Grid_lattice.h => Lattice.h} (58%)
 rename lib/{Grid_simd.h => Simd.h} (100%)
 rename lib/{Grid_stencil.h => Stencil.h} (99%)
 create mode 100644 lib/Tensors.h
 rename lib/{Grid_threads.h => Threads.h} (100%)
 rename lib/cartesian/{Grid_cartesian_base.h => Cartesian_base.h} (99%)
 rename lib/cartesian/{Grid_cartesian_full.h => Cartesian_full.h} (100%)
 rename lib/cartesian/{Grid_cartesian_red_black.h => Cartesian_red_black.h} (100%)
 rename lib/communicator/{Grid_communicator_base.h => Communicator_base.h} (100%)
 rename lib/communicator/{Grid_communicator_mpi.cc => Communicator_mpi.cc} (100%)
 rename lib/communicator/{Grid_communicator_none.cc => Communicator_none.cc} (100%)
 rename lib/cshift/{Grid_cshift_common.h => Cshift_common.h} (100%)
 rename lib/cshift/{Grid_cshift_mpi.h => Cshift_mpi.h} (100%)
 rename lib/cshift/{Grid_cshift_none.h => Cshift_none.h} (100%)
 rename lib/lattice/{Grid_lattice_ET.h => Lattice_ET.h} (100%)
 rename lib/lattice/{Grid_lattice_arith.h => Lattice_arith.h} (100%)
 rename lib/lattice/{Grid_lattice_base.h => Lattice_base.h} (93%)
 rename lib/lattice/{Grid_lattice_comparison.h => Lattice_comparison.h} (100%)
 rename lib/lattice/{Grid_lattice_conformable.h => Lattice_conformable.h} (100%)
 rename lib/lattice/{Grid_lattice_coordinate.h => Lattice_coordinate.h} (100%)
 rename lib/lattice/{Grid_lattice_local.h => Lattice_local.h} (100%)
 rename lib/lattice/{Grid_lattice_overload.h => Lattice_overload.h} (100%)
 rename lib/lattice/{Grid_lattice_peekpoke.h => Lattice_peekpoke.h} (100%)
 rename lib/lattice/{Grid_lattice_reality.h => Lattice_reality.h} (100%)
 rename lib/lattice/{Grid_lattice_reduction.h => Lattice_reduction.h} (100%)
 rename lib/lattice/{Grid_lattice_rng.h => Lattice_rng.h} (100%)
 rename lib/lattice/{Grid_lattice_trace.h => Lattice_trace.h} (100%)
 rename lib/lattice/{Grid_lattice_transfer.h => Lattice_transfer.h} (100%)
 rename lib/lattice/{Grid_lattice_transpose.h => Lattice_transpose.h} (100%)
 rename lib/lattice/{Grid_lattice_where.h => Lattice_where.h} (100%)
 delete mode 100644 lib/math/Grid_math_arith.h
 rename lib/parallelIO/{GridNerscIO.h => NerscIO.h} (100%)
 rename lib/stencil/{Grid_lebesgue.cc => Lebesgue.cc} (100%)
 rename lib/stencil/{Grid_lebesgue.h => Lebesgue.h} (100%)
 rename lib/stencil/{Grid_stencil_common.cc => Stencil_common.cc} (100%)
 create mode 100644 lib/tensors/Tensor_arith.h
 rename lib/{math/Grid_math_arith_add.h => tensors/Tensor_arith_add.h} (100%)
 rename lib/{math/Grid_math_arith_mac.h => tensors/Tensor_arith_mac.h} (100%)
 rename lib/{math/Grid_math_arith_mul.h => tensors/Tensor_arith_mul.h} (100%)
 rename lib/{math/Grid_math_arith_scalar.h => tensors/Tensor_arith_scalar.h} (100%)
 rename lib/{math/Grid_math_arith_sub.h => tensors/Tensor_arith_sub.h} (100%)
 rename lib/{math/Grid_math_tensors.h => tensors/Tensor_class.h} (100%)
 rename lib/{Grid_extract.h => tensors/Tensor_extract_merge.h} (100%)
 rename lib/{math/Grid_math_inner.h => tensors/Tensor_inner.h} (100%)
 rename lib/{math/Grid_math_outer.h => tensors/Tensor_outer.h} (100%)
 rename lib/{math/Grid_math_peek.h => tensors/Tensor_peek.h} (100%)
 rename lib/{math/Grid_math_poke.h => tensors/Tensor_poke.h} (100%)
 rename lib/{math/Grid_math_reality.h => tensors/Tensor_reality.h} (100%)
 rename lib/{math/Grid_math_trace.h => tensors/Tensor_trace.h} (100%)
 rename lib/{math/Grid_math_traits.h => tensors/Tensor_traits.h} (100%)
 rename lib/{math/Grid_math_transpose.h => tensors/Tensor_transpose.h} (100%)

diff --git a/Makefile.am b/Makefile.am
index fc3f6a0a..3b1d5690 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,5 @@
 # additional include paths necessary to compile the C++ library
 AM_CXXFLAGS = -I$(top_srcdir)/
 SUBDIRS = lib tests benchmarks
+
+filelist: $(SUBDIRS)
\ No newline at end of file
diff --git a/configure b/configure
index 615ba987..20328052 100755
--- a/configure
+++ b/configure
@@ -3064,7 +3064,7 @@ fi
 
 
 
-ac_config_headers="$ac_config_headers lib/Grid_config.h"
+ac_config_headers="$ac_config_headers lib/GridConfig.h"
 
 # Check whether --enable-silent-rules was given.
 if test "${enable_silent_rules+set}" = set; then :
@@ -5814,7 +5814,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "lib/Grid_config.h") CONFIG_HEADERS="$CONFIG_HEADERS lib/Grid_config.h" ;;
+    "lib/GridConfig.h") CONFIG_HEADERS="$CONFIG_HEADERS lib/GridConfig.h" ;;
     "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
     "docs/doxy.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxy.cfg" ;;
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
diff --git a/configure.ac b/configure.ac
index 5dcbea36..03fd7900 100644
--- a/configure.ac
+++ b/configure.ac
@@ -11,7 +11,7 @@ AC_CANONICAL_SYSTEM
 AM_INIT_AUTOMAKE(subdir-objects)
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_SRCDIR([lib/Grid.h])
-AC_CONFIG_HEADERS([lib/Grid_config.h])
+AC_CONFIG_HEADERS([lib/GridConfig.h])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
 AC_MSG_NOTICE([
diff --git a/lib/Grid_algorithms.h b/lib/Algorithms.h
similarity index 100%
rename from lib/Grid_algorithms.h
rename to lib/Algorithms.h
diff --git a/lib/Grid_aligned_allocator.h b/lib/AlignedAllocator.h
similarity index 100%
rename from lib/Grid_aligned_allocator.h
rename to lib/AlignedAllocator.h
diff --git a/lib/Cartesian.h b/lib/Cartesian.h
new file mode 100644
index 00000000..db384b53
--- /dev/null
+++ b/lib/Cartesian.h
@@ -0,0 +1,8 @@
+#ifndef GRID_CARTESIAN_H
+#define GRID_CARTESIAN_H
+
+#include <cartesian/Cartesian_base.h>
+#include <cartesian/Cartesian_full.h>
+#include <cartesian/Cartesian_red_black.h> 
+
+#endif
diff --git a/lib/Grid_communicator.h b/lib/Communicator.h
similarity index 57%
rename from lib/Grid_communicator.h
rename to lib/Communicator.h
index cfa6e0a7..6880adda 100644
--- a/lib/Grid_communicator.h
+++ b/lib/Communicator.h
@@ -1,6 +1,6 @@
 #ifndef GRID_COMMUNICATOR_H
 #define GRID_COMMUNICATOR_H
 
-#include <communicator/Grid_communicator_base.h>
+#include <communicator/Communicator_base.h>
 
 #endif
diff --git a/lib/Grid_comparison.h b/lib/Comparison.h
similarity index 98%
rename from lib/Grid_comparison.h
rename to lib/Comparison.h
index 3f9c206d..ecd6ece0 100644
--- a/lib/Grid_comparison.h
+++ b/lib/Comparison.h
@@ -141,7 +141,7 @@ namespace Grid {
     }
 }
 
-#include <lattice/Grid_lattice_comparison.h>
-#include <lattice/Grid_lattice_where.h>
+#include <lattice/Lattice_comparison.h>
+#include <lattice/Lattice_where.h>
 
 #endif
diff --git a/lib/Grid_cshift.h b/lib/Cshift.h
similarity index 51%
rename from lib/Grid_cshift.h
rename to lib/Cshift.h
index 10c7a3c4..3caccbf9 100644
--- a/lib/Grid_cshift.h
+++ b/lib/Cshift.h
@@ -1,13 +1,13 @@
 #ifndef _GRID_CSHIFT_H_
 #define _GRID_CSHIFT_H_
 
-#include <cshift/Grid_cshift_common.h>
+#include <cshift/Cshift_common.h>
 
 #ifdef GRID_COMMS_NONE
-#include <cshift/Grid_cshift_none.h>
+#include <cshift/Cshift_none.h>
 #endif
 
 #ifdef GRID_COMMS_MPI
-#include <cshift/Grid_cshift_mpi.h>
+#include <cshift/Cshift_mpi.h>
 #endif 
 #endif
diff --git a/lib/Grid.h b/lib/Grid.h
index 7fa56892..16530434 100644
--- a/lib/Grid.h
+++ b/lib/Grid.h
@@ -33,7 +33,7 @@
 
 #define strong_inline __attribute__((always_inline)) inline
 
-#include <Grid_config.h>
+#include <GridConfig.h>
 
 ////////////////////////////////////////////////////////////
 // Tunable header includes
@@ -46,22 +46,22 @@
 #include <malloc.h>
 #endif
 
-#include <Grid_aligned_allocator.h>
+#include <AlignedAllocator.h>
 
-#include <Grid_simd.h>
-#include <Grid_threads.h>
+#include <Simd.h>
+#include <Threads.h>
 
-#include <Grid_cartesian.h> // subdir aggregate
-#include <Grid_math.h>      // subdir aggregate
-#include <Grid_lattice.h>   // subdir aggregate
-#include <Grid_comparison.h>
-#include <Grid_cshift.h>    // subdir aggregate
-#include <Grid_stencil.h>   // subdir aggregate
-
-#include <Grid_algorithms.h>// subdir aggregate
+#include <Communicator.h> // subdir aggregate
+#include <Cartesian.h> // subdir aggregate
+#include <Tensors.h>   // subdir aggregate
+#include <Lattice.h>   // subdir aggregate
+#include <Comparison.h>
+#include <Cshift.h>    // subdir aggregate
+#include <Stencil.h>   // subdir aggregate
+#include <Algorithms.h>// subdir aggregate
 
 #include <qcd/QCD.h>
-#include <parallelIO/GridNerscIO.h>
+#include <parallelIO/NerscIO.h>
 
 namespace Grid {
 
diff --git a/lib/Grid_config.h b/lib/GridConfig.h
similarity index 100%
rename from lib/Grid_config.h
rename to lib/GridConfig.h
diff --git a/lib/Grid_config.h.in b/lib/GridConfig.h.in
similarity index 97%
rename from lib/Grid_config.h.in
rename to lib/GridConfig.h.in
index 6f05d6cb..03f91ff4 100644
--- a/lib/Grid_config.h.in
+++ b/lib/GridConfig.h.in
@@ -1,4 +1,4 @@
-/* lib/Grid_config.h.in.  Generated from configure.ac by autoheader.  */
+/* lib/GridConfig.h.in.  Generated from configure.ac by autoheader.  */
 
 /* AVX */
 #undef AVX1
diff --git a/lib/Grid_init.cc b/lib/GridInit.cc
similarity index 100%
rename from lib/Grid_init.cc
rename to lib/GridInit.cc
diff --git a/lib/Grid_cartesian.h b/lib/Grid_cartesian.h
deleted file mode 100644
index c01be20a..00000000
--- a/lib/Grid_cartesian.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef GRID_CARTESIAN_H
-#define GRID_CARTESIAN_H
-
-#include <cartesian/Grid_cartesian_base.h>
-#include <cartesian/Grid_cartesian_full.h>
-#include <cartesian/Grid_cartesian_red_black.h> 
-
-#endif
diff --git a/lib/Grid_math.h b/lib/Grid_math.h
deleted file mode 100644
index 17bc09a5..00000000
--- a/lib/Grid_math.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef GRID_MATH_H
-#define GRID_MATH_H
-
-#include <math/Grid_math_traits.h>
-#include <math/Grid_math_tensors.h>
-#include <math/Grid_math_arith.h>
-#include <math/Grid_math_inner.h>
-#include <math/Grid_math_outer.h>
-#include <math/Grid_math_transpose.h>
-#include <math/Grid_math_trace.h>
-#include <math/Grid_math_peek.h>
-#include <math/Grid_math_poke.h>
-#include <math/Grid_math_reality.h>
-
-    
-#endif
diff --git a/lib/Grid_lattice.h b/lib/Lattice.h
similarity index 58%
rename from lib/Grid_lattice.h
rename to lib/Lattice.h
index 35664aee..1f29a908 100644
--- a/lib/Grid_lattice.h
+++ b/lib/Lattice.h
@@ -1,6 +1,6 @@
 #ifndef GRID_LATTICE_H
 #define GRID_LATTICE_H
 
-#include <lattice/Grid_lattice_base.h>
+#include <lattice/Lattice_base.h>
 
 #endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index b688d8b2..7ee52e0e 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -3,119 +3,26 @@ AM_CXXFLAGS = -I$(top_srcdir)/
 
 extra_sources=
 if BUILD_COMMS_MPI
-  extra_sources+=communicator/Grid_communicator_mpi.cc
+  extra_sources+=communicator/Communicator_mpi.cc
 endif
 
 if BUILD_COMMS_NONE
-  extra_sources+=communicator/Grid_communicator_none.cc
+  extra_sources+=communicator/Communicator_none.cc
 endif
 
 #
 # Libraries
 #
-lib_LIBRARIES = libGrid.a
-libGrid_a_SOURCES =				\
-	Grid_init.cc				\
-	stencil/Grid_lebesgue.cc		\
-	stencil/Grid_stencil_common.cc		\
-	algorithms/approx/Zolotarev.cc		\
-	algorithms/approx/Remez.cc		\
-	qcd/SpaceTimeGrid.cc\
-	qcd/Dirac.cc\
-	qcd/action/fermion/WilsonKernels.cc\
-	qcd/action/fermion/WilsonKernelsHand.cc\
-	qcd/action/fermion/WilsonFermion.cc\
-	qcd/action/fermion/WilsonFermion5D.cc\
-	qcd/action/fermion/CayleyFermion5D.cc \
-	qcd/action/fermion/ContinuedFractionFermion5D.cc	\
-	$(extra_sources)
 
-#	qcd/action/fermion/PartialFractionFermion5D.cc	\
+include Make.inc
+
+lib_LIBRARIES = libGrid.a
+libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
+
+
+#	qcd/action/fermion/PartialFractionFermion5D.cc\	\
 #
 # Include files
 #
-nobase_include_HEADERS=\
-		./algorithms/approx/bigfloat.h\
-		./algorithms/approx/bigfloat_double.h\
-		./algorithms/approx/Chebyshev.h\
-		./algorithms/approx/Remez.h\
-		./algorithms/approx/Zolotarev.h\
-		./algorithms/iterative/ConjugateGradient.h\
-		./algorithms/iterative/NormalEquations.h\
-		./algorithms/iterative/SchurRedBlack.h\
-		./algorithms/LinearOperator.h\
-		./algorithms/SparseMatrix.h\
-		./cartesian/Grid_cartesian_base.h\
-		./cartesian/Grid_cartesian_full.h\
-		./cartesian/Grid_cartesian_red_black.h\
-		./communicator/Grid_communicator_base.h\
-		./cshift/Grid_cshift_common.h\
-		./cshift/Grid_cshift_mpi.h\
-		./cshift/Grid_cshift_none.h\
-		./Grid.h\
-		./Grid_algorithms.h\
-		./Grid_aligned_allocator.h\
-		./Grid_cartesian.h\
-		./Grid_communicator.h\
-		./Grid_comparison.h\
-		./Grid_config.h\
-		./Grid_cshift.h\
-		./Grid_extract.h\
-		./Grid_lattice.h\
-		./Grid_math.h\
-		./Grid_simd.h\
-		./Grid_stencil.h\
-		./Grid_threads.h\
-		./lattice/Grid_lattice_arith.h\
-		./lattice/Grid_lattice_base.h\
-		./lattice/Grid_lattice_comparison.h\
-		./lattice/Grid_lattice_conformable.h\
-		./lattice/Grid_lattice_coordinate.h\
-		./lattice/Grid_lattice_ET.h\
-		./lattice/Grid_lattice_local.h\
-		./lattice/Grid_lattice_overload.h\
-		./lattice/Grid_lattice_peekpoke.h\
-		./lattice/Grid_lattice_reality.h\
-		./lattice/Grid_lattice_reduction.h\
-		./lattice/Grid_lattice_rng.h\
-		./lattice/Grid_lattice_trace.h\
-		./lattice/Grid_lattice_transfer.h\
-		./lattice/Grid_lattice_transpose.h\
-		./lattice/Grid_lattice_where.h\
-		./math/Grid_math_arith.h\
-		./math/Grid_math_arith_add.h\
-		./math/Grid_math_arith_mac.h\
-		./math/Grid_math_arith_mul.h\
-		./math/Grid_math_arith_scalar.h\
-		./math/Grid_math_arith_sub.h\
-		./math/Grid_math_inner.h\
-		./math/Grid_math_outer.h\
-		./math/Grid_math_peek.h\
-		./math/Grid_math_poke.h\
-		./math/Grid_math_reality.h\
-		./math/Grid_math_tensors.h\
-		./math/Grid_math_trace.h\
-		./math/Grid_math_traits.h\
-		./math/Grid_math_transpose.h\
-		./parallelIO/GridNerscIO.h\
-		./qcd/action/Actions.h\
-		./qcd/action/fermion/FermionOperator.h\
-		./qcd/action/fermion/WilsonCompressor.h\
-		./qcd/action/fermion/WilsonKernels.h\
-		./qcd/action/fermion/WilsonFermion.h\
-		./qcd/action/fermion/WilsonFermion5D.h\
-		./qcd/Dirac.h\
-		./qcd/QCD.h\
-		./qcd/TwoSpinor.h\
-		./simd/Grid_avx.h\
-		./simd/Grid_avx512.h\
-		./simd/Grid_qpx.h\
-		./simd/Grid_sse4.h\
-		./simd/Grid_vector_types.h\
-		./simd/Old/Grid_vComplexD.h\
-		./simd/Old/Grid_vComplexF.h\
-		./simd/Old/Grid_vInteger.h\
-		./simd/Old/Grid_vRealD.h\
-		./simd/Old/Grid_vRealF.h\
-		./stencil/Grid_lebesgue.h
+nobase_include_HEADERS=$(HFILES)
 
diff --git a/lib/Grid_simd.h b/lib/Simd.h
similarity index 100%
rename from lib/Grid_simd.h
rename to lib/Simd.h
diff --git a/lib/Grid_stencil.h b/lib/Stencil.h
similarity index 99%
rename from lib/Grid_stencil.h
rename to lib/Stencil.h
index 50d22453..8529e73a 100644
--- a/lib/Grid_stencil.h
+++ b/lib/Stencil.h
@@ -1,7 +1,7 @@
 #ifndef GRID_STENCIL_H
 #define GRID_STENCIL_H
 
-#include <stencil/Grid_lebesgue.h>   // subdir aggregate
+#include <stencil/Lebesgue.h>   // subdir aggregate
 
 //////////////////////////////////////////////////////////////////////////////////////////
 // Must not lose sight that goal is to be able to construct really efficient
diff --git a/lib/Tensors.h b/lib/Tensors.h
new file mode 100644
index 00000000..e812149f
--- /dev/null
+++ b/lib/Tensors.h
@@ -0,0 +1,16 @@
+#ifndef GRID_MATH_H
+#define GRID_MATH_H
+
+#include <tensors/Tensor_traits.h>
+#include <tensors/Tensor_class.h>
+#include <tensors/Tensor_arith.h>
+#include <tensors/Tensor_inner.h>
+#include <tensors/Tensor_outer.h>
+#include <tensors/Tensor_transpose.h>
+#include <tensors/Tensor_trace.h>
+#include <tensors/Tensor_peek.h>
+#include <tensors/Tensor_poke.h>
+#include <tensors/Tensor_reality.h>
+#include <tensors/Tensor_extract_merge.h>
+    
+#endif
diff --git a/lib/Grid_threads.h b/lib/Threads.h
similarity index 100%
rename from lib/Grid_threads.h
rename to lib/Threads.h
diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Cartesian_base.h
similarity index 99%
rename from lib/cartesian/Grid_cartesian_base.h
rename to lib/cartesian/Cartesian_base.h
index 66339648..6303e38e 100644
--- a/lib/cartesian/Grid_cartesian_base.h
+++ b/lib/cartesian/Cartesian_base.h
@@ -2,7 +2,6 @@
 #define GRID_CARTESIAN_BASE_H
 
 #include <Grid.h>
-#include <Grid_communicator.h>
 
 namespace Grid{
 
diff --git a/lib/cartesian/Grid_cartesian_full.h b/lib/cartesian/Cartesian_full.h
similarity index 100%
rename from lib/cartesian/Grid_cartesian_full.h
rename to lib/cartesian/Cartesian_full.h
diff --git a/lib/cartesian/Grid_cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h
similarity index 100%
rename from lib/cartesian/Grid_cartesian_red_black.h
rename to lib/cartesian/Cartesian_red_black.h
diff --git a/lib/communicator/Grid_communicator_base.h b/lib/communicator/Communicator_base.h
similarity index 100%
rename from lib/communicator/Grid_communicator_base.h
rename to lib/communicator/Communicator_base.h
diff --git a/lib/communicator/Grid_communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc
similarity index 100%
rename from lib/communicator/Grid_communicator_mpi.cc
rename to lib/communicator/Communicator_mpi.cc
diff --git a/lib/communicator/Grid_communicator_none.cc b/lib/communicator/Communicator_none.cc
similarity index 100%
rename from lib/communicator/Grid_communicator_none.cc
rename to lib/communicator/Communicator_none.cc
diff --git a/lib/cshift/Grid_cshift_common.h b/lib/cshift/Cshift_common.h
similarity index 100%
rename from lib/cshift/Grid_cshift_common.h
rename to lib/cshift/Cshift_common.h
diff --git a/lib/cshift/Grid_cshift_mpi.h b/lib/cshift/Cshift_mpi.h
similarity index 100%
rename from lib/cshift/Grid_cshift_mpi.h
rename to lib/cshift/Cshift_mpi.h
diff --git a/lib/cshift/Grid_cshift_none.h b/lib/cshift/Cshift_none.h
similarity index 100%
rename from lib/cshift/Grid_cshift_none.h
rename to lib/cshift/Cshift_none.h
diff --git a/lib/lattice/Grid_lattice_ET.h b/lib/lattice/Lattice_ET.h
similarity index 100%
rename from lib/lattice/Grid_lattice_ET.h
rename to lib/lattice/Lattice_ET.h
diff --git a/lib/lattice/Grid_lattice_arith.h b/lib/lattice/Lattice_arith.h
similarity index 100%
rename from lib/lattice/Grid_lattice_arith.h
rename to lib/lattice/Lattice_arith.h
diff --git a/lib/lattice/Grid_lattice_base.h b/lib/lattice/Lattice_base.h
similarity index 93%
rename from lib/lattice/Grid_lattice_base.h
rename to lib/lattice/Lattice_base.h
index 15ef0944..6b5fe261 100644
--- a/lib/lattice/Grid_lattice_base.h
+++ b/lib/lattice/Lattice_base.h
@@ -283,24 +283,23 @@ PARALLEL_FOR_LOOP
 
 
 
-#include <lattice/Grid_lattice_conformable.h>
+#include <lattice/Lattice_conformable.h>
 #define GRID_LATTICE_EXPRESSION_TEMPLATES
 #ifdef  GRID_LATTICE_EXPRESSION_TEMPLATES
-#include <lattice/Grid_lattice_ET.h>
+#include <lattice/Lattice_ET.h>
 #else 
-#include <lattice/Grid_lattice_overload.h>
+#include <lattice/Lattice_overload.h>
 #endif
-#include <lattice/Grid_lattice_arith.h>
-#include <lattice/Grid_lattice_trace.h>
-#include <lattice/Grid_lattice_transpose.h>
-#include <lattice/Grid_lattice_local.h>
-#include <lattice/Grid_lattice_reduction.h>
-#include <lattice/Grid_lattice_peekpoke.h>
-#include <lattice/Grid_lattice_reality.h>
-#include <Grid_extract.h>
-#include <lattice/Grid_lattice_coordinate.h>
-#include <lattice/Grid_lattice_rng.h>
-#include <lattice/Grid_lattice_transfer.h>
+#include <lattice/Lattice_arith.h>
+#include <lattice/Lattice_trace.h>
+#include <lattice/Lattice_transpose.h>
+#include <lattice/Lattice_local.h>
+#include <lattice/Lattice_reduction.h>
+#include <lattice/Lattice_peekpoke.h>
+#include <lattice/Lattice_reality.h>
+#include <lattice/Lattice_coordinate.h>
+#include <lattice/Lattice_rng.h>
+#include <lattice/Lattice_transfer.h>
 
 
 
diff --git a/lib/lattice/Grid_lattice_comparison.h b/lib/lattice/Lattice_comparison.h
similarity index 100%
rename from lib/lattice/Grid_lattice_comparison.h
rename to lib/lattice/Lattice_comparison.h
diff --git a/lib/lattice/Grid_lattice_conformable.h b/lib/lattice/Lattice_conformable.h
similarity index 100%
rename from lib/lattice/Grid_lattice_conformable.h
rename to lib/lattice/Lattice_conformable.h
diff --git a/lib/lattice/Grid_lattice_coordinate.h b/lib/lattice/Lattice_coordinate.h
similarity index 100%
rename from lib/lattice/Grid_lattice_coordinate.h
rename to lib/lattice/Lattice_coordinate.h
diff --git a/lib/lattice/Grid_lattice_local.h b/lib/lattice/Lattice_local.h
similarity index 100%
rename from lib/lattice/Grid_lattice_local.h
rename to lib/lattice/Lattice_local.h
diff --git a/lib/lattice/Grid_lattice_overload.h b/lib/lattice/Lattice_overload.h
similarity index 100%
rename from lib/lattice/Grid_lattice_overload.h
rename to lib/lattice/Lattice_overload.h
diff --git a/lib/lattice/Grid_lattice_peekpoke.h b/lib/lattice/Lattice_peekpoke.h
similarity index 100%
rename from lib/lattice/Grid_lattice_peekpoke.h
rename to lib/lattice/Lattice_peekpoke.h
diff --git a/lib/lattice/Grid_lattice_reality.h b/lib/lattice/Lattice_reality.h
similarity index 100%
rename from lib/lattice/Grid_lattice_reality.h
rename to lib/lattice/Lattice_reality.h
diff --git a/lib/lattice/Grid_lattice_reduction.h b/lib/lattice/Lattice_reduction.h
similarity index 100%
rename from lib/lattice/Grid_lattice_reduction.h
rename to lib/lattice/Lattice_reduction.h
diff --git a/lib/lattice/Grid_lattice_rng.h b/lib/lattice/Lattice_rng.h
similarity index 100%
rename from lib/lattice/Grid_lattice_rng.h
rename to lib/lattice/Lattice_rng.h
diff --git a/lib/lattice/Grid_lattice_trace.h b/lib/lattice/Lattice_trace.h
similarity index 100%
rename from lib/lattice/Grid_lattice_trace.h
rename to lib/lattice/Lattice_trace.h
diff --git a/lib/lattice/Grid_lattice_transfer.h b/lib/lattice/Lattice_transfer.h
similarity index 100%
rename from lib/lattice/Grid_lattice_transfer.h
rename to lib/lattice/Lattice_transfer.h
diff --git a/lib/lattice/Grid_lattice_transpose.h b/lib/lattice/Lattice_transpose.h
similarity index 100%
rename from lib/lattice/Grid_lattice_transpose.h
rename to lib/lattice/Lattice_transpose.h
diff --git a/lib/lattice/Grid_lattice_where.h b/lib/lattice/Lattice_where.h
similarity index 100%
rename from lib/lattice/Grid_lattice_where.h
rename to lib/lattice/Lattice_where.h
diff --git a/lib/math/Grid_math_arith.h b/lib/math/Grid_math_arith.h
deleted file mode 100644
index ca90ba88..00000000
--- a/lib/math/Grid_math_arith.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef GRID_MATH_ARITH_H
-#define GRID_MATH_ARITH_H
-
-#include <math/Grid_math_arith_add.h>
-#include <math/Grid_math_arith_sub.h>
-#include <math/Grid_math_arith_mac.h>
-#include <math/Grid_math_arith_mul.h>
-#include <math/Grid_math_arith_scalar.h>
-
-#endif
-
diff --git a/lib/parallelIO/GridNerscIO.h b/lib/parallelIO/NerscIO.h
similarity index 100%
rename from lib/parallelIO/GridNerscIO.h
rename to lib/parallelIO/NerscIO.h
diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc
index 21f62a75..8b137891 100644
--- a/lib/qcd/action/fermion/PartialFractionFermion5D.cc
+++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc
@@ -1,47 +1 @@
-#ifndef  GRID_QCD_PARTIAL_FRACTION_H
-#define  GRID_QCD_PARTIAL_FRACTION_H
 
-namespace Grid {
-
-  namespace QCD {
-
-    class PartialFractionFermion5D : public WilsonFermion5D
-    {
-    public:
-
-      // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
-
-      // half checkerboard operaions
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
-
-    private:
-
-      zolotarev_data *zdata;
-
-      // Part frac
-      double R=(1+this->mass)/(1-this->mass);
-      std::vector<double> p; 
-      std::vector<double> q;
-
-      // Constructors
-      PartialFractionFermion5D(LatticeGaugeField &_Umu,
-				    GridCartesian         &FiveDimGrid,
-				    GridRedBlackCartesian &FiveDimRedBlackGrid,
-				    GridCartesian         &FourDimGrid,
-				    GridRedBlackCartesian &FourDimRedBlackGrid,
-				    RealD _mass,RealD M5);
-
-    };
-
-
-  }
-}
-
-#endif
diff --git a/lib/stencil/Grid_lebesgue.cc b/lib/stencil/Lebesgue.cc
similarity index 100%
rename from lib/stencil/Grid_lebesgue.cc
rename to lib/stencil/Lebesgue.cc
diff --git a/lib/stencil/Grid_lebesgue.h b/lib/stencil/Lebesgue.h
similarity index 100%
rename from lib/stencil/Grid_lebesgue.h
rename to lib/stencil/Lebesgue.h
diff --git a/lib/stencil/Grid_stencil_common.cc b/lib/stencil/Stencil_common.cc
similarity index 100%
rename from lib/stencil/Grid_stencil_common.cc
rename to lib/stencil/Stencil_common.cc
diff --git a/lib/tensors/Tensor_arith.h b/lib/tensors/Tensor_arith.h
new file mode 100644
index 00000000..853a19a5
--- /dev/null
+++ b/lib/tensors/Tensor_arith.h
@@ -0,0 +1,11 @@
+#ifndef GRID_MATH_ARITH_H
+#define GRID_MATH_ARITH_H
+
+#include <tensors/Tensor_arith_add.h>
+#include <tensors/Tensor_arith_sub.h>
+#include <tensors/Tensor_arith_mac.h>
+#include <tensors/Tensor_arith_mul.h>
+#include <tensors/Tensor_arith_scalar.h>
+
+#endif
+
diff --git a/lib/math/Grid_math_arith_add.h b/lib/tensors/Tensor_arith_add.h
similarity index 100%
rename from lib/math/Grid_math_arith_add.h
rename to lib/tensors/Tensor_arith_add.h
diff --git a/lib/math/Grid_math_arith_mac.h b/lib/tensors/Tensor_arith_mac.h
similarity index 100%
rename from lib/math/Grid_math_arith_mac.h
rename to lib/tensors/Tensor_arith_mac.h
diff --git a/lib/math/Grid_math_arith_mul.h b/lib/tensors/Tensor_arith_mul.h
similarity index 100%
rename from lib/math/Grid_math_arith_mul.h
rename to lib/tensors/Tensor_arith_mul.h
diff --git a/lib/math/Grid_math_arith_scalar.h b/lib/tensors/Tensor_arith_scalar.h
similarity index 100%
rename from lib/math/Grid_math_arith_scalar.h
rename to lib/tensors/Tensor_arith_scalar.h
diff --git a/lib/math/Grid_math_arith_sub.h b/lib/tensors/Tensor_arith_sub.h
similarity index 100%
rename from lib/math/Grid_math_arith_sub.h
rename to lib/tensors/Tensor_arith_sub.h
diff --git a/lib/math/Grid_math_tensors.h b/lib/tensors/Tensor_class.h
similarity index 100%
rename from lib/math/Grid_math_tensors.h
rename to lib/tensors/Tensor_class.h
diff --git a/lib/Grid_extract.h b/lib/tensors/Tensor_extract_merge.h
similarity index 100%
rename from lib/Grid_extract.h
rename to lib/tensors/Tensor_extract_merge.h
diff --git a/lib/math/Grid_math_inner.h b/lib/tensors/Tensor_inner.h
similarity index 100%
rename from lib/math/Grid_math_inner.h
rename to lib/tensors/Tensor_inner.h
diff --git a/lib/math/Grid_math_outer.h b/lib/tensors/Tensor_outer.h
similarity index 100%
rename from lib/math/Grid_math_outer.h
rename to lib/tensors/Tensor_outer.h
diff --git a/lib/math/Grid_math_peek.h b/lib/tensors/Tensor_peek.h
similarity index 100%
rename from lib/math/Grid_math_peek.h
rename to lib/tensors/Tensor_peek.h
diff --git a/lib/math/Grid_math_poke.h b/lib/tensors/Tensor_poke.h
similarity index 100%
rename from lib/math/Grid_math_poke.h
rename to lib/tensors/Tensor_poke.h
diff --git a/lib/math/Grid_math_reality.h b/lib/tensors/Tensor_reality.h
similarity index 100%
rename from lib/math/Grid_math_reality.h
rename to lib/tensors/Tensor_reality.h
diff --git a/lib/math/Grid_math_trace.h b/lib/tensors/Tensor_trace.h
similarity index 100%
rename from lib/math/Grid_math_trace.h
rename to lib/tensors/Tensor_trace.h
diff --git a/lib/math/Grid_math_traits.h b/lib/tensors/Tensor_traits.h
similarity index 100%
rename from lib/math/Grid_math_traits.h
rename to lib/tensors/Tensor_traits.h
diff --git a/lib/math/Grid_math_transpose.h b/lib/tensors/Tensor_transpose.h
similarity index 100%
rename from lib/math/Grid_math_transpose.h
rename to lib/tensors/Tensor_transpose.h
diff --git a/tests/Test_cshift.cc b/tests/Test_cshift.cc
index 76125977..cc1da333 100644
--- a/tests/Test_cshift.cc
+++ b/tests/Test_cshift.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace Grid;
 using namespace Grid::QCD;
diff --git a/tests/Test_cshift_red_black.cc b/tests/Test_cshift_red_black.cc
index 212e9b6e..9ffa66b1 100644
--- a/tests/Test_cshift_red_black.cc
+++ b/tests/Test_cshift_red_black.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace Grid;
 using namespace Grid::QCD;
diff --git a/tests/Test_gamma.cc b/tests/Test_gamma.cc
index e803029b..bae77ecc 100644
--- a/tests/Test_gamma.cc
+++ b/tests/Test_gamma.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace std;
 using namespace Grid;
diff --git a/tests/Test_nersc_io.cc b/tests/Test_nersc_io.cc
index 6fe587a6..80d78291 100644
--- a/tests/Test_nersc_io.cc
+++ b/tests/Test_nersc_io.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace std;
 using namespace Grid;
diff --git a/tests/Test_rng.cc b/tests/Test_rng.cc
index 97f6c6b7..1731b740 100644
--- a/tests/Test_rng.cc
+++ b/tests/Test_rng.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace std;
 using namespace Grid;
diff --git a/tests/Test_rng_fixed.cc b/tests/Test_rng_fixed.cc
index 01e3315b..c836c93f 100644
--- a/tests/Test_rng_fixed.cc
+++ b/tests/Test_rng_fixed.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace std;
 using namespace Grid;
diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc
index db600fe4..f4858d06 100644
--- a/tests/Test_simd.cc
+++ b/tests/Test_simd.cc
@@ -1,5 +1,4 @@
 #include <Grid.h>
-#include <parallelIO/GridNerscIO.h>
 
 using namespace std;
 using namespace Grid;

From 98dcb6831bc110e85106168453d3c2400d9ff2c7 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 12:47:46 +0100
Subject: [PATCH 15/20] Convenience script to build the list of headers and .cc
 files in the library

---
 scripts/filelist | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100755 scripts/filelist

diff --git a/scripts/filelist b/scripts/filelist
new file mode 100755
index 00000000..73c6db51
--- /dev/null
+++ b/scripts/filelist
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+cd lib
+HFILES=`find . -type f -name '*.h'`
+CCFILES=`find . -type f -name '*.cc' -not  -name '*ommunicator*.cc'`
+echo HFILES=$HFILES > Make.inc
+echo >> Make.inc
+echo CCFILES=$CCFILES >> Make.inc
+cd ..

From 4ef11d96e958627deda5c06dabce9d1a5d2b5bfc Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 12:49:36 +0100
Subject: [PATCH 16/20] Make.inc needed in repo

---
 lib/Make.inc | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 lib/Make.inc

diff --git a/lib/Make.inc b/lib/Make.inc
new file mode 100644
index 00000000..31683256
--- /dev/null
+++ b/lib/Make.inc
@@ -0,0 +1,3 @@
+HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h
+
+CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc

From eaa3e6aaf6aff695eb8bc60e384133864092a0d0 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Wed, 3 Jun 2015 13:07:00 +0100
Subject: [PATCH 17/20] Assist for generating file lists contained in Make.inc
 files for convenience when things are added

---
 benchmarks/Make.inc                           |  23 +++
 benchmarks/Makefile.am                        |  23 +--
 ...nchmark_su3_expr.cc => simple_su3_expr.cc} |   0
 ...nchmark_su3_test.cc => simple_su3_test.cc} |   0
 lib/Make.inc                                  |   1 +
 scripts/filelist                              |  49 +++++-
 tests/Make.inc                                |  83 +++++++++
 tests/Makefile.am                             |  89 +---------
 tests/Test_simd_new.cc                        | 165 ------------------
 9 files changed, 157 insertions(+), 276 deletions(-)
 create mode 100644 benchmarks/Make.inc
 rename benchmarks/{Benchmark_su3_expr.cc => simple_su3_expr.cc} (100%)
 rename benchmarks/{Benchmark_su3_test.cc => simple_su3_test.cc} (100%)
 create mode 100644 tests/Make.inc
 delete mode 100644 tests/Test_simd_new.cc

diff --git a/benchmarks/Make.inc b/benchmarks/Make.inc
new file mode 100644
index 00000000..9871149c
--- /dev/null
+++ b/benchmarks/Make.inc
@@ -0,0 +1,23 @@
+
+bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson
+
+
+Benchmark_comms_SOURCES=Benchmark_comms.cc
+Benchmark_comms_LDADD=-lGrid
+
+
+Benchmark_dwf_SOURCES=Benchmark_dwf.cc
+Benchmark_dwf_LDADD=-lGrid
+
+
+Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
+Benchmark_memory_bandwidth_LDADD=-lGrid
+
+
+Benchmark_su3_SOURCES=Benchmark_su3.cc
+Benchmark_su3_LDADD=-lGrid
+
+
+Benchmark_wilson_SOURCES=Benchmark_wilson.cc
+Benchmark_wilson_LDADD=-lGrid
+
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index 74ff03c6..8081844f 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -5,25 +5,4 @@ AM_LDFLAGS = -L$(top_builddir)/lib
 #
 # Test code
 #
-bin_PROGRAMS = \
-	Benchmark_comms \
-	Benchmark_memory_bandwidth \
-	Benchmark_su3 \
-	Benchmark_wilson \
-	Benchmark_dwf
-
-Benchmark_comms_SOURCES = Benchmark_comms.cc
-Benchmark_comms_LDADD = -lGrid
-
-Benchmark_memory_bandwidth_SOURCES = Benchmark_memory_bandwidth.cc
-Benchmark_memory_bandwidth_LDADD = -lGrid
-
-Benchmark_su3_SOURCES = Benchmark_su3.cc Benchmark_su3_test.cc Benchmark_su3_expr.cc
-Benchmark_su3_LDADD = -lGrid
-
-Benchmark_wilson_SOURCES = Benchmark_wilson.cc
-Benchmark_wilson_LDADD = -lGrid
-
-Benchmark_dwf_SOURCES = Benchmark_dwf.cc
-Benchmark_dwf_LDADD = -lGrid
-
+include Make.inc
diff --git a/benchmarks/Benchmark_su3_expr.cc b/benchmarks/simple_su3_expr.cc
similarity index 100%
rename from benchmarks/Benchmark_su3_expr.cc
rename to benchmarks/simple_su3_expr.cc
diff --git a/benchmarks/Benchmark_su3_test.cc b/benchmarks/simple_su3_test.cc
similarity index 100%
rename from benchmarks/Benchmark_su3_test.cc
rename to benchmarks/simple_su3_test.cc
diff --git a/lib/Make.inc b/lib/Make.inc
index 31683256..dd5269ac 100644
--- a/lib/Make.inc
+++ b/lib/Make.inc
@@ -1,3 +1,4 @@
+
 HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h
 
 CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc
diff --git a/scripts/filelist b/scripts/filelist
index 73c6db51..1b575611 100755
--- a/scripts/filelist
+++ b/scripts/filelist
@@ -1,9 +1,56 @@
 #!/bin/bash
 
 cd lib
+
 HFILES=`find . -type f -name '*.h'`
 CCFILES=`find . -type f -name '*.cc' -not  -name '*ommunicator*.cc'`
-echo HFILES=$HFILES > Make.inc
+echo> Make.inc
+echo HFILES=$HFILES >> Make.inc
 echo >> Make.inc
 echo CCFILES=$CCFILES >> Make.inc
+
 cd ..
+
+
+
+cd tests
+
+echo> Make.inc
+TESTS=`ls T*.cc`
+TESTLIST=`echo ${TESTS} | sed s/.cc//g `
+
+echo > Make.inc
+echo bin_PROGRAMS = ${TESTLIST} >> Make.inc
+echo >> Make.inc
+
+for f in $TESTS
+do
+BNAME=`basename $f .cc`
+echo >> Make.inc
+echo ${BNAME}_SOURCES=$f  >> Make.inc
+echo ${BNAME}_LDADD=-lGrid>> Make.inc
+echo >> Make.inc
+done
+
+cd ..
+
+
+cd benchmarks
+
+
+echo> Make.inc
+TESTS=`ls B*.cc`
+TESTLIST=`echo ${TESTS} | sed s/.cc//g `
+
+echo > Make.inc
+echo bin_PROGRAMS = ${TESTLIST} >> Make.inc
+echo >> Make.inc
+
+for f in $TESTS
+do
+BNAME=`basename $f .cc`
+echo >> Make.inc
+echo ${BNAME}_SOURCES=$f  >> Make.inc
+echo ${BNAME}_LDADD=-lGrid>> Make.inc
+echo >> Make.inc
+done
diff --git a/tests/Make.inc b/tests/Make.inc
new file mode 100644
index 00000000..d592f218
--- /dev/null
+++ b/tests/Make.inc
@@ -0,0 +1,83 @@
+
+bin_PROGRAMS = Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_many_cg Test_many_evenodd Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_evenodd
+
+
+Test_cshift_SOURCES=Test_cshift.cc
+Test_cshift_LDADD=-lGrid
+
+
+Test_cshift_red_black_SOURCES=Test_cshift_red_black.cc
+Test_cshift_red_black_LDADD=-lGrid
+
+
+Test_dwf_cg_prec_SOURCES=Test_dwf_cg_prec.cc
+Test_dwf_cg_prec_LDADD=-lGrid
+
+
+Test_dwf_cg_schur_SOURCES=Test_dwf_cg_schur.cc
+Test_dwf_cg_schur_LDADD=-lGrid
+
+
+Test_dwf_cg_unprec_SOURCES=Test_dwf_cg_unprec.cc
+Test_dwf_cg_unprec_LDADD=-lGrid
+
+
+Test_dwf_even_odd_SOURCES=Test_dwf_even_odd.cc
+Test_dwf_even_odd_LDADD=-lGrid
+
+
+Test_gamma_SOURCES=Test_gamma.cc
+Test_gamma_LDADD=-lGrid
+
+
+Test_main_SOURCES=Test_main.cc
+Test_main_LDADD=-lGrid
+
+
+Test_many_cg_SOURCES=Test_many_cg.cc
+Test_many_cg_LDADD=-lGrid
+
+
+Test_many_evenodd_SOURCES=Test_many_evenodd.cc
+Test_many_evenodd_LDADD=-lGrid
+
+
+Test_nersc_io_SOURCES=Test_nersc_io.cc
+Test_nersc_io_LDADD=-lGrid
+
+
+Test_remez_SOURCES=Test_remez.cc
+Test_remez_LDADD=-lGrid
+
+
+Test_rng_SOURCES=Test_rng.cc
+Test_rng_LDADD=-lGrid
+
+
+Test_rng_fixed_SOURCES=Test_rng_fixed.cc
+Test_rng_fixed_LDADD=-lGrid
+
+
+Test_simd_SOURCES=Test_simd.cc
+Test_simd_LDADD=-lGrid
+
+
+Test_stencil_SOURCES=Test_stencil.cc
+Test_stencil_LDADD=-lGrid
+
+
+Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc
+Test_wilson_cg_prec_LDADD=-lGrid
+
+
+Test_wilson_cg_schur_SOURCES=Test_wilson_cg_schur.cc
+Test_wilson_cg_schur_LDADD=-lGrid
+
+
+Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc
+Test_wilson_cg_unprec_LDADD=-lGrid
+
+
+Test_wilson_evenodd_SOURCES=Test_wilson_evenodd.cc
+Test_wilson_evenodd_LDADD=-lGrid
+
diff --git a/tests/Makefile.am b/tests/Makefile.am
index bb3448f1..83385001 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -2,91 +2,4 @@
 AM_CXXFLAGS = -I$(top_srcdir)/lib
 AM_LDFLAGS = -L$(top_builddir)/lib
 
-#
-# Test code
-#
-bin_PROGRAMS = Test_main \
-	Test_simd \
-	Test_gamma  \
-	Test_cshift \
-	Test_cshift_red_black \
-	Test_stencil \
-	Test_nersc_io \
-	Test_rng \
-	Test_remez \
-	Test_rng_fixed \
-	Test_wilson_evenodd \
-	Test_wilson_cg_unprec \
-	Test_wilson_cg_prec \
-	Test_wilson_cg_schur \
-	Test_dwf_even_odd\
-	Test_dwf_cg_unprec\
-	Test_dwf_cg_prec\
-	Test_dwf_cg_schur\
-	Test_many_evenodd\
-	Test_many_cg
-
-
-
-Test_main_SOURCES = Test_main.cc
-Test_main_LDADD = -lGrid
-
-Test_rng_SOURCES = Test_rng.cc
-Test_rng_LDADD = -lGrid
-
-Test_rng_fixed_SOURCES = Test_rng_fixed.cc
-Test_rng_fixed_LDADD = -lGrid
-
-Test_remez_SOURCES = Test_remez.cc
-Test_remez_LDADD = -lGrid
-
-Test_nersc_io_SOURCES = Test_nersc_io.cc
-Test_nersc_io_LDADD = -lGrid
-
-Test_cshift_SOURCES = Test_cshift.cc
-Test_cshift_LDADD = -lGrid
-
-Test_cshift_red_black_SOURCES = Test_cshift_red_black.cc
-Test_cshift_red_black_LDADD = -lGrid
-
-Test_gamma_SOURCES = Test_gamma.cc
-Test_gamma_LDADD = -lGrid
-
-Test_stencil_SOURCES = Test_stencil.cc
-Test_stencil_LDADD = -lGrid
-
-Test_simd_SOURCES = Test_simd.cc
-Test_simd_LDADD = -lGrid
-
-#Test_simd_new_SOURCES = Test_simd_new.cc
-#Test_simd_new_LDADD = -lGrid
-
-Test_wilson_evenodd_SOURCES = Test_wilson_evenodd.cc
-Test_wilson_evenodd_LDADD = -lGrid
-
-Test_wilson_cg_unprec_SOURCES = Test_wilson_cg_unprec.cc
-Test_wilson_cg_unprec_LDADD = -lGrid
-
-Test_wilson_cg_prec_SOURCES = Test_wilson_cg_prec.cc
-Test_wilson_cg_prec_LDADD = -lGrid
-
-Test_wilson_cg_schur_SOURCES = Test_wilson_cg_schur.cc
-Test_wilson_cg_schur_LDADD = -lGrid
-
-Test_dwf_even_odd_SOURCES = Test_dwf_even_odd.cc
-Test_dwf_even_odd_LDADD = -lGrid
-
-Test_dwf_cg_unprec_SOURCES = Test_dwf_cg_unprec.cc
-Test_dwf_cg_unprec_LDADD = -lGrid
-
-Test_dwf_cg_prec_SOURCES = Test_dwf_cg_prec.cc
-Test_dwf_cg_prec_LDADD = -lGrid
-
-Test_dwf_cg_schur_SOURCES = Test_dwf_cg_schur.cc
-Test_dwf_cg_schur_LDADD = -lGrid
-
-Test_many_evenodd_SOURCES = Test_many_evenodd.cc
-Test_many_evenodd_LDADD = -lGrid
-
-Test_many_cg_SOURCES = Test_many_cg.cc
-Test_many_cg_LDADD = -lGrid
+include Make.inc
diff --git a/tests/Test_simd_new.cc b/tests/Test_simd_new.cc
deleted file mode 100644
index 41781304..00000000
--- a/tests/Test_simd_new.cc
+++ /dev/null
@@ -1,165 +0,0 @@
-#include <Grid.h>
-#include "simd/Grid_vector_types.h"
-#include <parallelIO/GridNerscIO.h>
-
-using namespace std;
-using namespace Grid;
-using namespace Grid::QCD;
-
-class funcPlus {
-public:
-  funcPlus() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
-  std::string name(void) const { return std::string("Plus"); }
-};
-class funcMinus {
-public:
-  funcMinus() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
-  std::string name(void) const { return std::string("Minus"); }
-};
-class funcTimes {
-public:
-  funcTimes() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
-  std::string name(void) const { return std::string("Times"); }
-};
-class funcConj {
-public:
-  funcConj() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);}
-  std::string name(void) const { return std::string("Conj"); }
-};
-class funcAdj {
-public:
-  funcAdj() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
-  std::string name(void) const { return std::string("Adj"); }
-};
-
-class funcTimesI {
-public:
-  funcTimesI() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
-  std::string name(void) const { return std::string("timesI"); }
-};
-
-class funcTimesMinusI {
-public:
-  funcTimesMinusI() {};
-  template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
-  std::string name(void) const { return std::string("timesMinusI"); }
-};
-
-template<class scal, class vec,class functor > 
-void Tester(const functor &func)
-{
-  GridSerialRNG          sRNG;
-  sRNG.SeedRandomDevice();
-  
-  int Nsimd = vec::Nsimd();
-
-  std::vector<scal> input1(Nsimd);
-  std::vector<scal> input2(Nsimd);
-  std::vector<scal> result(Nsimd);
-  std::vector<scal> reference(Nsimd);
-
-  std::vector<vec,alignedAllocator<vec> > buf(3);
-  vec & v_input1 = buf[0];
-  vec & v_input2 = buf[1];
-  vec & v_result = buf[2];
-
-
-  for(int i=0;i<Nsimd;i++){
-    random(sRNG,input1[i]);
-    random(sRNG,input2[i]);
-    random(sRNG,result[i]);
-  }
-
-  merge<vec,scal>(v_input1,input1);
-  merge<vec,scal>(v_input2,input2);
-  merge<vec,scal>(v_result,result);
-
-  func(v_result,v_input1,v_input2);
-
-  for(int i=0;i<Nsimd;i++) {
-    func(reference[i],input1[i],input2[i]);
-  }
-
-  extract<vec,scal>(v_result,result);
-  std::cout << " " << func.name()<<std::endl;
-
-  int ok=0;
-  for(int i=0;i<Nsimd;i++){
-    if ( abs(reference[i]-result[i])>0){
-      std::cout<< "*****" << std::endl;
-      std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
-      ok++;
-    }
-  }
-  if ( ok==0 ) std::cout << " OK!" <<std::endl;
-
-}
-
-
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  std::vector<int> latt_size   = GridDefaultLatt();
-  std::vector<int> simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd());
-  std::vector<int> mpi_layout  = GridDefaultMpi();
-    
-  GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
-  std::vector<int> seeds({1,2,3,4});
-
-  // Insist that operations on random scalars gives
-  // identical results to on vectors.
-
-  std::cout << "==================================="<<  std::endl;
-  std::cout << "Testing MyComplexF "<<std::endl;
-  std::cout << "==================================="<<  std::endl;
-
-  Tester<ComplexF,MyComplexF>(funcTimesI());
-  Tester<ComplexF,MyComplexF>(funcTimesMinusI());
-  Tester<ComplexF,MyComplexF>(funcPlus());
-  Tester<ComplexF,MyComplexF>(funcMinus());
-  Tester<ComplexF,MyComplexF>(funcTimes());
-  Tester<ComplexF,MyComplexF>(funcConj());
-  Tester<ComplexF,MyComplexF>(funcAdj());
-
-  std::cout << "==================================="<<  std::endl;
-  std::cout << "Testing MyComplexD "<<std::endl;
-  std::cout << "==================================="<<  std::endl;
-
-
-  Tester<ComplexD,MyComplexD>(funcTimesI());
-  Tester<ComplexD,MyComplexD>(funcTimesMinusI());
-  Tester<ComplexD,MyComplexD>(funcPlus());
-  Tester<ComplexD,MyComplexD>(funcMinus());
-  Tester<ComplexD,MyComplexD>(funcTimes());
-  Tester<ComplexD,MyComplexD>(funcConj());
-  Tester<ComplexD,MyComplexD>(funcAdj());
-
-  std::cout << "==================================="<<  std::endl;
-  std::cout << "Testing MyRealF "<<std::endl;
-  std::cout << "==================================="<<  std::endl;
-
-
-  Tester<RealF,MyRealF>(funcPlus());
-  Tester<RealF,MyRealF>(funcMinus());
-  Tester<RealF,MyRealF>(funcTimes());
-  Tester<RealF,MyRealF>(funcAdj());
-
-  std::cout << "==================================="<<  std::endl;
-  std::cout << "Testing MyRealD "<<std::endl;
-  std::cout << "==================================="<<  std::endl;
-
-  Tester<RealD,MyRealD>(funcPlus());
-  Tester<RealD,MyRealD>(funcMinus());
-  Tester<RealD,MyRealD>(funcTimes());
-  Tester<RealD,MyRealD>(funcAdj());
-
-  Grid_finalize();
-}

From 50bd2935278d283b53fe24091aa7bb0abb05ef97 Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Thu, 4 Jun 2015 00:00:45 +0100
Subject: [PATCH 18/20] First pass at continued fraction; solver and even odd
 decomposition tests pass. Have to make ContFrac class virtual and derive end
 non-abstract actions for the particular cases.

---
 lib/qcd/action/fermion/CayleyFermion5D.cc     |  11 +-
 lib/qcd/action/fermion/CayleyFermion5D.h      |   5 +-
 .../fermion/ContinuedFractionFermion5D.cc     |  81 +++++--
 .../fermion/ContinuedFractionFermion5D.h      |  34 +--
 lib/qcd/action/fermion/DomainWallFermion.h    |   3 +-
 lib/qcd/action/fermion/MobiusFermion.h        |   3 +-
 .../action/fermion/MobiusZolotarevFermion.h   |   4 +-
 tests/InvSqrt.gnu                             |   0
 tests/Make.inc                                |  30 ++-
 tests/Sqrt.gnu                                |   2 -
 tests/{Test_many_cg.cc => Test_cayley_cg.cc}  |   0
 ...any_evenodd.cc => Test_cayley_even_odd.cc} |   0
 tests/Test_contfrac_cg.cc                     | 147 ++++++++++++
 tests/Test_contfrac_even_odd.cc               | 218 ++++++++++++++++++
 ...son_evenodd.cc => Test_wilson_even_odd.cc} |   0
 15 files changed, 490 insertions(+), 48 deletions(-)
 delete mode 100644 tests/InvSqrt.gnu
 delete mode 100644 tests/Sqrt.gnu
 rename tests/{Test_many_cg.cc => Test_cayley_cg.cc} (100%)
 rename tests/{Test_many_evenodd.cc => Test_cayley_even_odd.cc} (100%)
 create mode 100644 tests/Test_contfrac_cg.cc
 create mode 100644 tests/Test_contfrac_even_odd.cc
 rename tests/{Test_wilson_evenodd.cc => Test_wilson_even_odd.cc} (100%)

diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc
index be528e79..e47ff331 100644
--- a/lib/qcd/action/fermion/CayleyFermion5D.cc
+++ b/lib/qcd/action/fermion/CayleyFermion5D.cc
@@ -229,7 +229,14 @@ namespace QCD {
     }
   }
   
-  void CayleyFermion5D::SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c)
+  // Tanh
+  void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
+  {
+    SetCoefficientsZolotarev(1.0,zdata,b,c);
+
+  }
+  //Zolo
+  void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
   {
 
     ///////////////////////////////////////////////////////////
@@ -266,7 +273,7 @@ namespace QCD {
     double bmc = b-c;
     for(int i=0; i < Ls; i++){
       as[i] = 1.0;
-      omega[i] = ((double)zdata->gamma[i]); //NB reciprocal relative to Chroma NEF code
+      omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
       bs[i] = 0.5*(bpc/omega[i] + bmc);
       cs[i] = 0.5*(bpc/omega[i] - bmc);
     }
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h
index 57c71992..e2175d77 100644
--- a/lib/qcd/action/fermion/CayleyFermion5D.h
+++ b/lib/qcd/action/fermion/CayleyFermion5D.h
@@ -20,7 +20,7 @@ namespace Grid {
       virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
       virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
       virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
-
+      virtual void   Instantiatable(void)=0;
       //    protected:
       RealD mass;
 
@@ -52,7 +52,8 @@ namespace Grid {
 		      RealD _mass,RealD _M5);
 
     protected:
-      void SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c);
+      void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
+      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
     };
 
   }
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
index c281b486..250e365f 100644
--- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
@@ -1,9 +1,56 @@
 #include <Grid.h>
 
 namespace Grid {
-
   namespace QCD {
 
+    void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
+    {
+      SetCoefficientsZolotarev(1.0,zdata,b,c);
+    }
+    void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
+    {
+      R=(1+this->mass)/(1-this->mass);
+
+      Beta.resize(Ls);
+      cc.resize(Ls);
+      cc_d.resize(Ls);
+      sqrt_cc.resize(Ls);
+      for(int i=0; i < Ls ; i++){
+	Beta[i] = zdata -> beta[i];
+	cc[i] = 1.0/Beta[i];
+	cc_d[i]=sqrt(cc[i]);
+      }
+    
+      cc_d[Ls-1]=1.0;
+      for(int i=0; i < Ls-1 ; i++){
+	sqrt_cc[i]= sqrt(cc[i]*cc[i+1]);
+      }    
+      sqrt_cc[Ls-2]=sqrt(cc[Ls-2]);
+
+
+      ZoloHiInv =1.0/zolo_hi;
+      double dw_diag = (4.0-M5)*ZoloHiInv;
+    
+      See.resize(Ls);
+      Aee.resize(Ls);
+      int sign=1;
+      for(int s=0;s<Ls;s++){
+	Aee[s] = sign * Beta[s] * dw_diag;
+	sign   = - sign;
+      }
+      Aee[Ls-1] += R;
+    
+      See[0] = Aee[0];
+      for(int s=1;s<Ls;s++){
+	See[s] = Aee[s] - 1.0/See[s-1];
+      }
+      for(int s=0;s<Ls;s++){
+	std::cout <<"s = "<<s<<" Beta "<<Beta[s]<<" Aee "<<Aee[s] <<" See "<<See[s] <<std::endl;
+      }
+    }
+
+
+
     RealD  ContinuedFractionFermion5D::M           (const LatticeFermion &psi, LatticeFermion &chi)
     {
       LatticeFermion D(psi._grid);
@@ -13,13 +60,13 @@ namespace Grid {
       int sign=1;
       for(int s=0;s<Ls;s++){
 	if ( s==0 ) {
-	  ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*scale,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
+	  ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*ZoloHiInv,D,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
 	} else if ( s==(Ls-1) ){
 	  RealD R=(1.0+mass)/(1.0-mass);
-	  ag5xpby_ssp(chi,Beta[s]*scale,D,sqrt_cc[s-1],psi,s,s-1);
+	  ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,D,sqrt_cc[s-1],psi,s,s-1);
 	  ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
 	} else {
-	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*scale,D,sqrt_cc[s],psi,s,s+1);
+	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,D,sqrt_cc[s],psi,s,s+1);
   	  axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1);
 	}
 	sign=-sign; 
@@ -35,18 +82,22 @@ namespace Grid {
     }
     void   ContinuedFractionFermion5D::Meooe       (const LatticeFermion &psi, LatticeFermion &chi)
     {
-      Dhop(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
+      // Apply 4d dslash
+      if ( psi.checkerboard == Odd ) {
+	DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
+      } else {
+	DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
+      }
       
       int sign=1;
       for(int s=0;s<Ls;s++){
 	if ( s==(Ls-1) ){
-	  ag5xpby_ssp(chi,Beta[s]*scale,chi,0.0,chi,s,s);
+	  ag5xpby_ssp(chi,Beta[s]*ZoloHiInv,chi,0.0,chi,s,s);
 	} else {
-	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*scale,chi,0.0,chi,s,s);
+	  ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*ZoloHiInv,chi,0.0,chi,s,s);
 	}
 	sign=-sign; 
-    }
-
+      }
     }
     void   ContinuedFractionFermion5D::MeooeDag    (const LatticeFermion &psi, LatticeFermion &chi)
     {
@@ -54,7 +105,7 @@ namespace Grid {
     }
     void   ContinuedFractionFermion5D::Mooee       (const LatticeFermion &psi, LatticeFermion &chi)
     {
-      double dw_diag = (4.0-this->M5)*scale;
+      double dw_diag = (4.0-M5)*ZoloHiInv;
     
       int sign=1;
       for(int s=0;s<Ls;s++){
@@ -62,7 +113,7 @@ namespace Grid {
 	  ag5xpby_ssp(chi,cc[0]*Beta[0]*sign*dw_diag,psi,sqrt_cc[0],psi,s,s+1); // Multiplies Dw by G5 so Hw
 	} else if ( s==(Ls-1) ){
 	  // Drop the CC here.
-	  double R=(1+this->mass)/(1-this->mass);
+	  double R=(1+mass)/(1-mass);
 	  ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1);
 	  ag5xpby_ssp(chi,R,psi,1.0,chi,s,s);
 	} else {
@@ -80,7 +131,7 @@ namespace Grid {
     void   ContinuedFractionFermion5D::MooeeInv    (const LatticeFermion &psi, LatticeFermion &chi)
     {
       // Apply Linv
-      axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0);
+      axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); 
       for(int s=1;s<Ls;s++){
 	axpbg5y_ssp(chi,1.0/cc_d[s],psi,-1.0/See[s-1],chi,s,s-1);
       }
@@ -89,7 +140,7 @@ namespace Grid {
 	ag5xpby_ssp(chi,1.0/See[s],chi,0.0,chi,s,s); //only appearance of See[0]
       }
       // Apply Uinv = (Linv)^T
-      axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,this->Ls-1,this->Ls-1);
+      axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,Ls-1,Ls-1);
       for(int s=Ls-2;s>=0;s--){
 	axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
       }
@@ -112,6 +163,10 @@ namespace Grid {
 		      FourDimGrid, FourDimRedBlackGrid,M5),
       mass(_mass)
     {
+      assert((Ls&0x1)==1); // Odd Ls required
+      int nrational=Ls-1;// Even rational order
+      zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham
+      SetCoefficientsTanh(zdata,1.0,0.0);
     }
 
   }
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
index 7f5c022a..99365009 100644
--- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
@@ -21,20 +21,8 @@ namespace Grid {
       virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
       virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
 
-    private:
-
-      Approx::zolotarev_data *zdata;
-
-      // Cont frac
-      RealD mass;
-      RealD R;
-      RealD scale;
-      std::vector<double> Beta;
-      std::vector<double> cc;;
-      std::vector<double> cc_d;;
-      std::vector<double> sqrt_cc;
-      std::vector<double> See;
-      std::vector<double> Aee;
+      //      virtual void   Instantiatable(void)=0;
+      virtual void   Instantiatable(void) {};
 
       // Constructors
       ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
@@ -44,6 +32,24 @@ namespace Grid {
 				 GridRedBlackCartesian &FourDimRedBlackGrid,
 				 RealD _mass,RealD M5);
 
+    protected:
+
+      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
+      void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c);
+
+      Approx::zolotarev_data *zdata;
+
+      // Cont frac
+      RealD mass;
+      RealD R;
+      RealD ZoloHiInv;
+      std::vector<double> Beta;
+      std::vector<double> cc;;
+      std::vector<double> cc_d;;
+      std::vector<double> sqrt_cc;
+      std::vector<double> See;
+      std::vector<double> Aee;
+
     };
 
 
diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h
index 3e6a9739..a25c0c3c 100644
--- a/lib/qcd/action/fermion/DomainWallFermion.h
+++ b/lib/qcd/action/fermion/DomainWallFermion.h
@@ -11,6 +11,7 @@ namespace Grid {
     {
     public:
 
+      virtual void   Instantiatable(void) {};
       // Constructors
       DomainWallFermion(LatticeGaugeField &_Umu,
 			GridCartesian         &FiveDimGrid,
@@ -33,7 +34,7 @@ namespace Grid {
 	
 	std::cout << "DomainWallFermion with Ls="<<Ls<<std::endl;
 	// Call base setter
-	this->CayleyFermion5D::SetCoefficients(1.0,zdata,1.0,0.0);
+	this->CayleyFermion5D::SetCoefficientsTanh(zdata,1.0,0.0);
  
       }
 
diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h
index 4c291fad..33f94089 100644
--- a/lib/qcd/action/fermion/MobiusFermion.h
+++ b/lib/qcd/action/fermion/MobiusFermion.h
@@ -11,6 +11,7 @@ namespace Grid {
     {
     public:
 
+      virtual void   Instantiatable(void) {};
       // Constructors
       MobiusFermion(LatticeGaugeField &_Umu,
 		    GridCartesian         &FiveDimGrid,
@@ -34,7 +35,7 @@ namespace Grid {
 	assert(zdata->n==this->Ls);
 	
 	// Call base setter
-	this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c);
+	this->CayleyFermion5D::SetCoefficientsTanh(zdata,b,c);
  
       }
 
diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
index 9ac795d9..1be61601 100644
--- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h
+++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
@@ -11,6 +11,7 @@ namespace Grid {
     {
     public:
 
+      virtual void   Instantiatable(void) {};
       // Constructors
        MobiusZolotarevFermion(LatticeGaugeField &_Umu,
 			      GridCartesian         &FiveDimGrid,
@@ -34,10 +35,9 @@ namespace Grid {
 	assert(zdata->n==this->Ls);
 
 	std::cout << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
-	std::cout << "MobiusZolotarevFermion : note there is a degeneracy between (b+c) and Zolo param hi"<<std::endl;
 	
 	// Call base setter
-	this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c);
+	this->CayleyFermion5D::SetCoefficientsZolotarev(hi,zdata,b,c);
  
       }
 
diff --git a/tests/InvSqrt.gnu b/tests/InvSqrt.gnu
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/Make.inc b/tests/Make.inc
index d592f218..b525874d 100644
--- a/tests/Make.inc
+++ b/tests/Make.inc
@@ -1,5 +1,21 @@
 
-bin_PROGRAMS = Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_many_cg Test_many_evenodd Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_evenodd
+bin_PROGRAMS = Test_cayley_cg Test_cayley_even_odd Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_even_odd
+
+
+Test_cayley_cg_SOURCES=Test_cayley_cg.cc
+Test_cayley_cg_LDADD=-lGrid
+
+
+Test_cayley_even_odd_SOURCES=Test_cayley_even_odd.cc
+Test_cayley_even_odd_LDADD=-lGrid
+
+
+Test_contfrac_cg_SOURCES=Test_contfrac_cg.cc
+Test_contfrac_cg_LDADD=-lGrid
+
+
+Test_contfrac_even_odd_SOURCES=Test_contfrac_even_odd.cc
+Test_contfrac_even_odd_LDADD=-lGrid
 
 
 Test_cshift_SOURCES=Test_cshift.cc
@@ -34,14 +50,6 @@ Test_main_SOURCES=Test_main.cc
 Test_main_LDADD=-lGrid
 
 
-Test_many_cg_SOURCES=Test_many_cg.cc
-Test_many_cg_LDADD=-lGrid
-
-
-Test_many_evenodd_SOURCES=Test_many_evenodd.cc
-Test_many_evenodd_LDADD=-lGrid
-
-
 Test_nersc_io_SOURCES=Test_nersc_io.cc
 Test_nersc_io_LDADD=-lGrid
 
@@ -78,6 +86,6 @@ Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc
 Test_wilson_cg_unprec_LDADD=-lGrid
 
 
-Test_wilson_evenodd_SOURCES=Test_wilson_evenodd.cc
-Test_wilson_evenodd_LDADD=-lGrid
+Test_wilson_even_odd_SOURCES=Test_wilson_even_odd.cc
+Test_wilson_even_odd_LDADD=-lGrid
 
diff --git a/tests/Sqrt.gnu b/tests/Sqrt.gnu
deleted file mode 100644
index ae56ab97..00000000
--- a/tests/Sqrt.gnu
+++ /dev/null
@@ -1,2 +0,0 @@
-f(x) = 6.81384+(-2.34645e-06/(x+0.000228091))+(-1.51593e-05/(x+0.00112084))+(-6.89254e-05/(x+0.003496))+(-0.000288983/(x+0.00954309))+(-0.00119277/(x+0.024928))+(-0.0050183/(x+0.0646627))+(-0.0226449/(x+0.171576))+(-0.123767/(x+0.491792))+(-1.1705/(x+1.78667))+(-102.992/(x+18.4866));
-f(x) = 0.14676+(0.00952992/(x+5.40933e-05))+(0.0115952/(x+0.000559699))+(0.0161824/(x+0.00203338))+(0.0243252/(x+0.00582831))+(0.0379533/(x+0.0154649))+(0.060699/(x+0.0401156))+(0.100345/(x+0.104788))+(0.178335/(x+0.286042))+(0.381586/(x+0.892189))+(1.42625/(x+4.38422));
diff --git a/tests/Test_many_cg.cc b/tests/Test_cayley_cg.cc
similarity index 100%
rename from tests/Test_many_cg.cc
rename to tests/Test_cayley_cg.cc
diff --git a/tests/Test_many_evenodd.cc b/tests/Test_cayley_even_odd.cc
similarity index 100%
rename from tests/Test_many_evenodd.cc
rename to tests/Test_cayley_even_odd.cc
diff --git a/tests/Test_contfrac_cg.cc b/tests/Test_contfrac_cg.cc
new file mode 100644
index 00000000..7fa0d6fc
--- /dev/null
+++ b/tests/Test_contfrac_cg.cc
@@ -0,0 +1,147 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+
+template<class What> 
+void  TestCGinversions(What & Ddwf, 
+		       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		       RealD mass, RealD M5,
+		       GridParallelRNG *RNG4,
+		       GridParallelRNG *RNG5);
+template<class What> 
+void  TestCGschur(What & Ddwf, 
+		  GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		  GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		  RealD mass, RealD M5,
+		  GridParallelRNG *RNG4,
+		  GridParallelRNG *RNG5);
+
+template<class What> 
+void  TestCGunprec(What & Ddwf, 
+		   GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		   GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		   RealD mass, RealD M5,
+		   GridParallelRNG *RNG4,
+		   GridParallelRNG *RNG5);
+
+template<class What> 
+void  TestCGprec(What & Ddwf, 
+		 GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		 GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		 RealD mass, RealD M5,
+		 GridParallelRNG *RNG4,
+		 GridParallelRNG *RNG5);
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  const int Ls=9;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+
+  RealD mass=0.1;
+  RealD M5  =1.8;
+  std::cout <<"ContinuedFractionFermion test"<<std::endl;
+  ContinuedFractionFermion5D Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestCGinversions<ContinuedFractionFermion5D>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  Grid_finalize();
+}
+template<class What> 
+void  TestCGinversions(What & Ddwf, 
+		       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		       RealD mass, RealD M5,
+		       GridParallelRNG *RNG4,
+		       GridParallelRNG *RNG5)
+{
+  std::cout << "Testing unpreconditioned inverter"<<std::endl;
+  TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+  std::cout << "Testing red black preconditioned inverter"<<std::endl;
+  TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+  std::cout << "Testing red black Schur inverter"<<std::endl;
+  TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+}
+
+template<class What> 
+void  TestCGunprec(What & Ddwf, 
+		   GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		   GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		   RealD mass, RealD M5,
+		   GridParallelRNG *RNG4,
+		   GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+
+  HermitianOperator<What,LatticeFermion> HermOp(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  CG(HermOp,src,result);
+
+}
+template<class What> 
+void  TestCGprec(What & Ddwf, 
+		 GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		 GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		 RealD mass, RealD M5,
+		 GridParallelRNG *RNG4,
+		 GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion    src_o(FrbGrid);
+  LatticeFermion result_o(FrbGrid);
+  pickCheckerboard(Odd,src_o,src);
+  result_o=zero;
+
+  HermitianCheckerBoardedOperator<What,LatticeFermion> HermOpEO(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  CG(HermOpEO,src_o,result_o);
+}
+
+
+template<class What> 
+void  TestCGschur(What & Ddwf, 
+		   GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+		   GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+		   RealD mass, RealD M5,
+		   GridParallelRNG *RNG4,
+		   GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  SchurRedBlackSolve<LatticeFermion> SchurSolver(CG);
+  SchurSolver(Ddwf,src,result);
+}
diff --git a/tests/Test_contfrac_even_odd.cc b/tests/Test_contfrac_even_odd.cc
new file mode 100644
index 00000000..801bd955
--- /dev/null
+++ b/tests/Test_contfrac_even_odd.cc
@@ -0,0 +1,218 @@
+#include <Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+
+template<class What> 
+void  TestWhat(What & Ddwf,
+	       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+	       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+	       RealD mass, RealD M5,
+	       GridParallelRNG *RNG4,   GridParallelRNG *RNG5);
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  const int Ls=9;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeGaugeField Umu(UGrid); random(RNG4,Umu);
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+
+  RealD mass=0.1;
+  RealD M5  =1.8;
+  std::cout <<"ContinuedFractionFermion test"<<std::endl;
+  ContinuedFractionFermion5D Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestWhat<ContinuedFractionFermion5D>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  Grid_finalize();
+}
+
+template<class What> 
+void  TestWhat(What & Ddwf, 
+	       GridCartesian         * FGrid,	       GridRedBlackCartesian * FrbGrid,
+	       GridCartesian         * UGrid,	       GridRedBlackCartesian * UrbGrid,
+	       RealD mass, RealD M5,
+	       GridParallelRNG *RNG4,
+	       GridParallelRNG *RNG5)
+{
+
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion phi   (FGrid); random(*RNG5,phi);
+  LatticeFermion chi   (FGrid); random(*RNG5,chi);
+  LatticeFermion result(FGrid); result=zero;
+  LatticeFermion    ref(FGrid);    ref=zero;
+  LatticeFermion    tmp(FGrid);    tmp=zero;
+  LatticeFermion    err(FGrid);    tmp=zero;
+
+  LatticeFermion src_e (FrbGrid);
+  LatticeFermion src_o (FrbGrid);
+  LatticeFermion r_e   (FrbGrid);
+  LatticeFermion r_o   (FrbGrid);
+  LatticeFermion r_eo  (FGrid);
+  LatticeFermion r_eeoo(FGrid);
+
+  std::cout<<"=========================================================="<<std::endl;
+  std::cout<<"= Testing that Meo + Moe + Moo + Mee = Munprec "<<std::endl;
+  std::cout<<"=========================================================="<<std::endl;
+
+  pickCheckerboard(Even,src_e,src);
+  pickCheckerboard( Odd,src_o,src);
+
+  Ddwf.Meooe(src_e,r_o);  std::cout<<"Applied Meo "<<norm2(r_o)<<std::endl;
+  Ddwf.Meooe(src_o,r_e);  std::cout<<"Applied Moe "<<norm2(r_e)<<std::endl;
+  setCheckerboard(r_eo,r_o);
+  setCheckerboard(r_eo,r_e);
+
+  Ddwf.Mooee(src_e,r_e);  std::cout<<"Applied Mee"<<norm2(r_e)<<std::endl;
+  Ddwf.Mooee(src_o,r_o);  std::cout<<"Applied Moo"<<norm2(r_o)<<std::endl;
+  setCheckerboard(r_eeoo,r_e);
+  setCheckerboard(r_eeoo,r_o);
+
+  r_eo=r_eo+r_eeoo;
+  Ddwf.M(src,ref);  
+
+  //  std::cout << r_eo<<std::endl;
+  //  std::cout << ref <<std::endl;
+
+  err= ref - r_eo;
+  std::cout << "EO norm diff   "<< norm2(err)<< " "<<norm2(ref)<< " " << norm2(r_eo) <<std::endl;
+    
+  LatticeComplex cerr(FGrid);
+  cerr = localInnerProduct(err,err);
+  //  std::cout << cerr<<std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test Ddagger is the dagger of D by requiring                "<<std::endl;
+  std::cout<<"=  < phi | Deo | chi > * = < chi | Deo^dag| phi>  "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+  
+  LatticeFermion chi_e   (FrbGrid);
+  LatticeFermion chi_o   (FrbGrid);
+
+  LatticeFermion dchi_e  (FrbGrid);
+  LatticeFermion dchi_o  (FrbGrid);
+
+  LatticeFermion phi_e   (FrbGrid);
+  LatticeFermion phi_o   (FrbGrid);
+
+  LatticeFermion dphi_e  (FrbGrid);
+  LatticeFermion dphi_o  (FrbGrid);
+
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+  pickCheckerboard(Even,phi_e,phi);
+  pickCheckerboard(Odd ,phi_o,phi);
+
+  Ddwf.Meooe(chi_e,dchi_o);
+  Ddwf.Meooe(chi_o,dchi_e);
+  Ddwf.MeooeDag(phi_e,dphi_o);
+  Ddwf.MeooeDag(phi_o,dphi_e);
+
+  ComplexD pDce = innerProduct(phi_e,dchi_e);
+  ComplexD pDco = innerProduct(phi_o,dchi_o);
+  ComplexD cDpe = innerProduct(chi_e,dphi_e);
+  ComplexD cDpo = innerProduct(chi_o,dphi_o);
+
+  std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
+  std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
+
+  std::cout <<"pDce - conj(cDpo) "<< pDce-conj(cDpo) <<std::endl;
+  std::cout <<"pDco - conj(cDpe) "<< pDco-conj(cDpe) <<std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MeeInv Mee = 1                                         "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+
+  Ddwf.Mooee(chi_e,src_e);
+  Ddwf.MooeeInv(src_e,phi_e);
+
+  Ddwf.Mooee(chi_o,src_o);
+  Ddwf.MooeeInv(src_o,phi_o);
+  
+  setCheckerboard(phi,phi_e);
+  setCheckerboard(phi,phi_o);
+
+  err = phi-chi;
+  std::cout << "norm diff   "<< norm2(err)<< std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MeeInvDag MeeDag = 1                                   "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+
+  Ddwf.MooeeDag(chi_e,src_e);
+  Ddwf.MooeeInvDag(src_e,phi_e);
+
+  Ddwf.MooeeDag(chi_o,src_o);
+  Ddwf.MooeeInvDag(src_o,phi_o);
+  
+  setCheckerboard(phi,phi_e);
+  setCheckerboard(phi,phi_o);
+
+  err = phi-chi;
+  std::cout << "norm diff   "<< norm2(err)<< std::endl;
+
+  std::cout<<"=============================================================="<<std::endl;
+  std::cout<<"= Test MpcDagMpc is Hermitian              "<<std::endl;
+  std::cout<<"=============================================================="<<std::endl;
+  
+  random(*RNG5,phi);
+  random(*RNG5,chi);
+  pickCheckerboard(Even,chi_e,chi);
+  pickCheckerboard(Odd ,chi_o,chi);
+  pickCheckerboard(Even,phi_e,phi);
+  pickCheckerboard(Odd ,phi_o,phi);
+  RealD t1,t2;
+
+  Ddwf.MpcDagMpc(chi_e,dchi_e,t1,t2);
+  Ddwf.MpcDagMpc(chi_o,dchi_o,t1,t2);
+
+  Ddwf.MpcDagMpc(phi_e,dphi_e,t1,t2);
+  Ddwf.MpcDagMpc(phi_o,dphi_o,t1,t2);
+
+  pDce = innerProduct(phi_e,dchi_e);
+  pDco = innerProduct(phi_o,dchi_o);
+  cDpe = innerProduct(chi_e,dphi_e);
+  cDpo = innerProduct(chi_o,dphi_o);
+
+  std::cout <<"e "<<pDce<<" "<<cDpe <<std::endl;
+  std::cout <<"o "<<pDco<<" "<<cDpo <<std::endl;
+
+  std::cout <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
+  std::cout <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
+  
+}
diff --git a/tests/Test_wilson_evenodd.cc b/tests/Test_wilson_even_odd.cc
similarity index 100%
rename from tests/Test_wilson_evenodd.cc
rename to tests/Test_wilson_even_odd.cc

From c327019574dfa1e5086594a3660ab235016109cc Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Thu, 4 Jun 2015 00:23:16 +0100
Subject: [PATCH 19/20] Implementing the Hw kernel continued fraction 5d
 overlap cases

---
 lib/qcd/action/Actions.h                      |  2 +
 .../fermion/ContinuedFractionFermion5D.cc     |  9 ++--
 .../fermion/ContinuedFractionFermion5D.h      |  6 +--
 .../OverlapWilsonContfracTanhFermion.h        | 39 ++++++++++++++++
 .../OverlapWilsonContfracZolotarevFermion.h   | 44 +++++++++++++++++++
 5 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
 create mode 100644 lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h

diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index 8a8c4642..b31e9136 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -55,6 +55,8 @@
 // Continued fraction
 //////////////////////
 #include <qcd/action/fermion/ContinuedFractionFermion5D.h>
+#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
+#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
 
 //////////////////////
 // Partial fraction
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
index 250e365f..92f6473e 100644
--- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
@@ -3,11 +3,11 @@
 namespace Grid {
   namespace QCD {
 
-    void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
+    void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
     {
-      SetCoefficientsZolotarev(1.0,zdata,b,c);
+      SetCoefficientsZolotarev(1.0/scale,zdata);
     }
-    void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
+    void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
     {
       R=(1+this->mass)/(1-this->mass);
 
@@ -164,9 +164,6 @@ namespace Grid {
       mass(_mass)
     {
       assert((Ls&0x1)==1); // Odd Ls required
-      int nrational=Ls-1;// Even rational order
-      zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham
-      SetCoefficientsTanh(zdata,1.0,0.0);
     }
 
   }
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
index 99365009..f363878f 100644
--- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
@@ -22,7 +22,7 @@ namespace Grid {
       virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
 
       //      virtual void   Instantiatable(void)=0;
-      virtual void   Instantiatable(void) {};
+      virtual void   Instantiatable(void) =0;
 
       // Constructors
       ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
@@ -34,8 +34,8 @@ namespace Grid {
 
     protected:
 
-      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
-      void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c);
+      void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale);
+      void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);;
 
       Approx::zolotarev_data *zdata;
 
diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
new file mode 100644
index 00000000..4865f169
--- /dev/null
+++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
@@ -0,0 +1,39 @@
+#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D
+    {
+    public:
+
+      virtual void   Instantiatable(void){};
+      // Constructors
+    OverlapWilsonContFracTanhFermion(LatticeGaugeField &_Umu,
+				     GridCartesian         &FiveDimGrid,
+				     GridRedBlackCartesian &FiveDimRedBlackGrid,
+				     GridCartesian         &FourDimGrid,
+				     GridRedBlackCartesian &FourDimRedBlackGrid,
+				     RealD _mass,RealD _M5,
+				     RealD scale) :
+      
+      // b+c=scale, b-c = 0 <=> b =c = scale/2
+      ContinuedFractionFermion5D(_Umu,
+		    FiveDimGrid,
+		    FiveDimRedBlackGrid,
+		    FourDimGrid,
+		    FourDimRedBlackGrid,_mass)
+	{
+	  assert((Ls&0x1)==1); // Odd Ls required
+	  int nrational=Ls-1;// Even rational order
+	  zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham
+	  SetCoefficientsTanh(zdata,scale);
+	}
+    };
+  }
+}
+#endif
diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
new file mode 100644
index 00000000..7478c062
--- /dev/null
+++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
@@ -0,0 +1,44 @@
+#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+
+#include <Grid.h>
+
+namespace Grid {
+
+  namespace QCD {
+
+    class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D
+    {
+    public:
+
+      virtual void   Instantiatable(void){};
+      // Constructors
+    OverlapWilsonContFracZolotarevFermion(LatticeGaugeField &_Umu,
+					  GridCartesian         &FiveDimGrid,
+					  GridRedBlackCartesian &FiveDimRedBlackGrid,
+					  GridCartesian         &FourDimGrid,
+					  GridRedBlackCartesian &FourDimRedBlackGrid,
+					  RealD _mass,RealD _M5,
+					  RealD lo,RealD hi):
+      
+      // b+c=scale, b-c = 0 <=> b =c = scale/2
+      ContinuedFractionFermion5D(_Umu,
+				 FiveDimGrid,
+				 FiveDimRedBlackGrid,
+				 FourDimGrid,
+				 FourDimRedBlackGrid,_mass)
+	{
+	  assert((Ls&0x1)==1); // Odd Ls required
+
+	  int nrational=Ls-1;// Even rational order
+	  RealD eps = lo/hi;
+
+	  Approx::zolotarev_data *zdata = Approx::grid_zolotarev(eps,nrational,0);
+
+	  SetCoefficientsZolotarev(hi,zdata);
+
+	}
+    };
+  }
+}
+#endif

From 37aa74dfd2c6634a017925e5dbccfd3ecfbefc6f Mon Sep 17 00:00:00 2001
From: Peter Boyle <paboyle@ph.ed.ac.uk>
Date: Thu, 4 Jun 2015 06:02:00 +0100
Subject: [PATCH 20/20] CG Tests work for wilson kernel cont frac zolo and tanh

---
 .../fermion/OverlapWilsonContfracTanhFermion.h       | 12 ++++++------
 .../fermion/OverlapWilsonContfracZolotarevFermion.h  |  6 +++---
 tests/Test_contfrac_cg.cc                            | 12 +++++++++---
 tests/Test_contfrac_even_odd.cc                      | 11 ++++++++---
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
index 4865f169..ed0c24dc 100644
--- a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
@@ -1,5 +1,5 @@
-#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
-#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
+#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
 
 #include <Grid.h>
 
@@ -23,10 +23,10 @@ namespace Grid {
       
       // b+c=scale, b-c = 0 <=> b =c = scale/2
       ContinuedFractionFermion5D(_Umu,
-		    FiveDimGrid,
-		    FiveDimRedBlackGrid,
-		    FourDimGrid,
-		    FourDimRedBlackGrid,_mass)
+				 FiveDimGrid,
+				 FiveDimRedBlackGrid,
+				 FourDimGrid,
+				 FourDimRedBlackGrid,_mass,_M5)
 	{
 	  assert((Ls&0x1)==1); // Odd Ls required
 	  int nrational=Ls-1;// Even rational order
diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
index 7478c062..caf01133 100644
--- a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
@@ -1,5 +1,5 @@
-#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
-#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
+#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
+#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
 
 #include <Grid.h>
 
@@ -26,7 +26,7 @@ namespace Grid {
 				 FiveDimGrid,
 				 FiveDimRedBlackGrid,
 				 FourDimGrid,
-				 FourDimRedBlackGrid,_mass)
+				 FourDimRedBlackGrid,_mass,_M5)
 	{
 	  assert((Ls&0x1)==1); // Odd Ls required
 
diff --git a/tests/Test_contfrac_cg.cc b/tests/Test_contfrac_cg.cc
index 7fa0d6fc..83475254 100644
--- a/tests/Test_contfrac_cg.cc
+++ b/tests/Test_contfrac_cg.cc
@@ -72,9 +72,15 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
   RealD M5  =1.8;
-  std::cout <<"ContinuedFractionFermion test"<<std::endl;
-  ContinuedFractionFermion5D Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  TestCGinversions<ContinuedFractionFermion5D>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+
+  std::cout <<"OverlapWilsonContFracTanhFermion  test"<<std::endl;
+  OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonContFracTanhFermion>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonContFracZolotarevFermion  test"<<std::endl;
+  OverlapWilsonContFracZolotarevFermion Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestCGinversions<OverlapWilsonContFracZolotarevFermion>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }
diff --git a/tests/Test_contfrac_even_odd.cc b/tests/Test_contfrac_even_odd.cc
index 801bd955..e13c1189 100644
--- a/tests/Test_contfrac_even_odd.cc
+++ b/tests/Test_contfrac_even_odd.cc
@@ -48,9 +48,14 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
   RealD M5  =1.8;
-  std::cout <<"ContinuedFractionFermion test"<<std::endl;
-  ContinuedFractionFermion5D Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  TestWhat<ContinuedFractionFermion5D>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonContFracTanhFermion  test"<<std::endl;
+  OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestWhat<OverlapWilsonContFracTanhFermion>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout <<"OverlapWilsonContFracZolotarevFermion  test"<<std::endl;
+  OverlapWilsonContFracZolotarevFermion Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestWhat<OverlapWilsonContFracZolotarevFermion>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }