diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc
index 9d9303ae..27772fdb 100644
--- a/benchmarks/Benchmark_dwf.cc
+++ b/benchmarks/Benchmark_dwf.cc
@@ -79,7 +79,7 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
   RealD M5  =1.8;
-  DomainWallFermion Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
   
   std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
   int ncall=10;
diff --git a/benchmarks/Benchmark_wilson.cc b/benchmarks/Benchmark_wilson.cc
index 7d23dec3..0308df33 100644
--- a/benchmarks/Benchmark_wilson.cc
+++ b/benchmarks/Benchmark_wilson.cc
@@ -87,7 +87,7 @@ int main (int argc, char ** argv)
   }
   ref = -0.5*ref;
   RealD mass=0.1;
-  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
   
   std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
   int ncall=10000;
diff --git a/lib/Init.cc b/lib/Init.cc
index 194fd589..2eeee6b4 100644
--- a/lib/Init.cc
+++ b/lib/Init.cc
@@ -177,8 +177,7 @@ void Grid_init(int *argc,char ***argv)
     Grid_quiesce_nodes();
   }
   if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
-    WilsonFermion::HandOptDslash=1;
-    WilsonFermion5D::HandOptDslash=1;
+    WilsonFermionStatic::HandOptDslash=1;
   }
   if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
     LebesgueOrder::UseLebesgueOrder=1;
diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h
index 1227ad74..16920f2b 100644
--- a/lib/qcd/QCD.h
+++ b/lib/qcd/QCD.h
@@ -19,6 +19,7 @@ namespace QCD {
     static const int Nd=4;
     static const int Nhs=2; // half spinor
     static const int Nds=8; // double stored gauge field
+    static const int Ngp=2; // gparity index range
 
     //////////////////////////////////////////////////////////////////////////////
     // QCD iMatrix types
@@ -27,7 +28,16 @@ namespace QCD {
     static const int ColourIndex = 2;
     static const int SpinIndex   = 1;
     static const int LorentzIndex= 0;
-    
+
+    // Useful traits is this a spin index
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+
+    const int SpinorIndex = 2;
+    template<typename T> struct isSpinor {
+      static const bool value = (SpinorIndex==T::TensorLevel);
+    };
+    template <typename T> using IfSpinor    = Invoke<std::enable_if<isSpinor<T>::value,int> > ;
+    template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
 
     // ChrisK very keen to add extra space for Gparity doubling.
     //
@@ -49,6 +59,10 @@ namespace QCD {
     template<typename vtype> using iHalfSpinVector            = iScalar<iVector<iScalar<vtype>, Nhs> >;
     template<typename vtype> using iHalfSpinColourVector      = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
 
+    template<typename vtype> using iGparitySpinColourVector       = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
+    template<typename vtype> using iGparityHalfSpinColourVector   = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
+
+
     // Spin matrix
     typedef iSpinMatrix<Complex  >          SpinMatrix;
     typedef iSpinMatrix<ComplexF >          SpinMatrixF;
diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h
index f40cd554..9b119858 100644
--- a/lib/qcd/action/Actions.h
+++ b/lib/qcd/action/Actions.h
@@ -17,9 +17,6 @@
 ////////////////////////////////////////////
 #include <qcd/action/ActionBase.h>
 
-#include <qcd/action/fermion/FermionOperator.h>
-
-
 ////////////////////////////////////////////
 // Gauge Actions
 ////////////////////////////////////////////
@@ -32,58 +29,118 @@
 #include <qcd/action/fermion/WilsonCompressor.h>     //used by all wilson type fermions
 #include <qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions
 
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Explicit explicit template instantiation is still required in the .cc files
+//
+// - CayleyFermion5D.cc
+// - PartialFractionFermion5D.cc
+// - WilsonFermion5D.cc
+// - WilsonKernelsHand.cc
+// - ContinuedFractionFermion5D.cc
+// - WilsonFermion.cc
+// - WilsonKernels.cc
+//
+// The explicit instantiation is only avoidable if we move this source to headers and end up with include/parse/recompile
+// for EVERY .cc file. This define centralises the list and restores global push of impl cases
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#define FermOpTemplateInstantiate(A) \
+  template class A<WilsonImplF>;		\
+  template class A<WilsonImplD>;
+
 ////////////////////////////////////////////
-// 4D formulations
+// Fermion operators / actions
 ////////////////////////////////////////////
-#include <qcd/action/fermion/WilsonFermion.h>
+#include <qcd/action/fermion/FermionOperator.h>
+
+#include <qcd/action/fermion/WilsonFermion.h>       // 4d wilson like
 //#include <qcd/action/fermion/CloverFermion.h>
 
-////////////////////////////////////////////
-// 5D formulations...
-////////////////////////////////////////////
-
-#include <qcd/action/fermion/WilsonFermion5D.h> // used by all 5d overlap types
-
-//////////
-// Cayley
-//////////
-#include <qcd/action/fermion/CayleyFermion5D.h>
+#include <qcd/action/fermion/WilsonFermion5D.h>     // 5d base used by all 5d overlap types
 
+#include <qcd/action/fermion/CayleyFermion5D.h>     // Cayley types
 #include <qcd/action/fermion/DomainWallFermion.h>
 #include <qcd/action/fermion/DomainWallFermion.h>
-
 #include <qcd/action/fermion/MobiusFermion.h>
 #include <qcd/action/fermion/ScaledShamirFermion.h>
-#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
-
 #include <qcd/action/fermion/MobiusZolotarevFermion.h>
 #include <qcd/action/fermion/ShamirZolotarevFermion.h>
+#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
 #include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
 
-//////////////////////
-// Continued fraction
-//////////////////////
-#include <qcd/action/fermion/ContinuedFractionFermion5D.h>
-#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
-#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
+#include <qcd/action/fermion/ContinuedFractionFermion5D.h>               // Continued fraction
+#include <qcd/action/fermion/OverlapWilsonContFracTanhFermion.h>
+#include <qcd/action/fermion/OverlapWilsonContFracZolotarevFermion.h>
 
-//////////////////////
-// Partial fraction
-//////////////////////
-#include <qcd/action/fermion/PartialFractionFermion5D.h>
+#include <qcd/action/fermion/PartialFractionFermion5D.h>                 // Partial fraction
 #include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
 #include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
 
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// More maintainable to maintain the following typedef list centrally, as more "impl" targets
+// are added, (e.g. extension for gparity, half precision project in comms etc..)
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+// Cayley 5d
+namespace Grid {
+  namespace QCD {
+
+typedef WilsonFermion<WilsonImplR> WilsonFermionR;
+typedef WilsonFermion<WilsonImplF> WilsonFermionF;
+typedef WilsonFermion<WilsonImplD> WilsonFermionD;
+
+typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
+typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
+typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
+typedef MobiusFermion<WilsonImplR> MobiusFermionR;
+typedef MobiusFermion<WilsonImplF> MobiusFermionF;
+typedef MobiusFermion<WilsonImplD> MobiusFermionD;
+typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
+typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
+typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
+
+typedef MobiusZolotarevFermion<WilsonImplR> MobiusZolotarevFermionR;
+typedef MobiusZolotarevFermion<WilsonImplF> MobiusZolotarevFermionF;
+typedef MobiusZolotarevFermion<WilsonImplD> MobiusZolotarevFermionD;
+typedef ShamirZolotarevFermion<WilsonImplR> ShamirZolotarevFermionR;
+typedef ShamirZolotarevFermion<WilsonImplF> ShamirZolotarevFermionF;
+typedef ShamirZolotarevFermion<WilsonImplD> ShamirZolotarevFermionD;
+
+typedef OverlapWilsonCayleyTanhFermion<WilsonImplR> OverlapWilsonCayleyTanhFermionR;
+typedef OverlapWilsonCayleyTanhFermion<WilsonImplF> OverlapWilsonCayleyTanhFermionF;
+typedef OverlapWilsonCayleyTanhFermion<WilsonImplD> OverlapWilsonCayleyTanhFermionD;
+typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplR> OverlapWilsonCayleyZolotarevFermionR;
+typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplF> OverlapWilsonCayleyZolotarevFermionF;
+typedef OverlapWilsonCayleyZolotarevFermion<WilsonImplD> OverlapWilsonCayleyZolotarevFermionD;
+
+// Continued fraction
+typedef OverlapWilsonContFracTanhFermion<WilsonImplR> OverlapWilsonContFracTanhFermionR;
+typedef OverlapWilsonContFracTanhFermion<WilsonImplF> OverlapWilsonContFracTanhFermionF;
+typedef OverlapWilsonContFracTanhFermion<WilsonImplD> OverlapWilsonContFracTanhFermionD;
+typedef OverlapWilsonContFracZolotarevFermion<WilsonImplR> OverlapWilsonContFracZolotarevFermionR;
+typedef OverlapWilsonContFracZolotarevFermion<WilsonImplF> OverlapWilsonContFracZolotarevFermionF;
+typedef OverlapWilsonContFracZolotarevFermion<WilsonImplD> OverlapWilsonContFracZolotarevFermionD;
+
+// Partial fraction
+typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplR> OverlapWilsonPartialFractionTanhFermionR;
+typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplF> OverlapWilsonPartialFractionTanhFermionF;
+typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplD> OverlapWilsonPartialFractionTanhFermionD;
+
+typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplR> OverlapWilsonPartialFractionZolotarevFermionR;
+typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplF> OverlapWilsonPartialFractionZolotarevFermionF;
+typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonPartialFractionZolotarevFermionD;
+  }}
 ///////////////////////////////////////////////////////////////////////////////
 // G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
 ///////////////////////////////////////////////////////////////////////////////
 #include <qcd/action/fermion/g5HermitianLinop.h>
 
 ////////////////////////////////////////
-// Pseudo fermion combinations
+// Pseudo fermion combinations for HMC
 ////////////////////////////////////////
 #include <qcd/action/pseudofermion/TwoFlavour.h>
 #include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
 
-
 #endif
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc
index c18edb7b..1ec7f930 100644
--- a/lib/qcd/action/fermion/CayleyFermion5D.cc
+++ b/lib/qcd/action/fermion/CayleyFermion5D.cc
@@ -2,13 +2,14 @@
 namespace Grid {
 namespace QCD {
 
- CayleyFermion5D::CayleyFermion5D(LatticeGaugeField &_Umu,
-				  GridCartesian         &FiveDimGrid,
-				  GridRedBlackCartesian &FiveDimRedBlackGrid,
-				  GridCartesian         &FourDimGrid,
-				  GridRedBlackCartesian &FourDimRedBlackGrid,
-				  RealD _mass,RealD _M5) :
-   WilsonFermion5D(_Umu,
+ template<class Impl>
+ CayleyFermion5D<Impl>::CayleyFermion5D(GaugeField &_Umu,
+					GridCartesian         &FiveDimGrid,
+					GridRedBlackCartesian &FiveDimRedBlackGrid,
+					GridCartesian         &FourDimGrid,
+					GridRedBlackCartesian &FourDimRedBlackGrid,
+					RealD _mass,RealD _M5) :
+   WilsonFermion5D<Impl>(_Umu,
 		   FiveDimGrid,
 		   FiveDimRedBlackGrid,
 		   FourDimGrid,
@@ -17,9 +18,11 @@ namespace QCD {
  {
  }
 
-  void CayleyFermion5D::Meooe5D    (const LatticeFermion &psi, LatticeFermion &Din)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::Meooe5D    (const FermionField &psi, FermionField &Din)
   {
     // Assemble Din
+    int Ls=this->Ls;
     for(int s=0;s<Ls;s++){
       if ( s==0 ) {
 	//	Din = bs psi[s] + cs[s] psi[s+1}
@@ -35,8 +38,10 @@ namespace QCD {
       }
     }
   }
-  void CayleyFermion5D::MeooeDag5D    (const LatticeFermion &psi, LatticeFermion &Din)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::MeooeDag5D    (const FermionField &psi, FermionField &Din)
   {
+    int Ls=this->Ls;
     for(int s=0;s<Ls;s++){
       if ( s==0 ) {
 	axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
@@ -52,9 +57,12 @@ namespace QCD {
   }
 
   // override multiply
-  RealD CayleyFermion5D::M    (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  RealD CayleyFermion5D<Impl>::M    (const FermionField &psi, FermionField &chi)
   {
-    LatticeFermion Din(psi._grid);
+    int Ls=this->Ls;
+
+    FermionField Din(psi._grid);
 
     // Assemble Din
     /*
@@ -75,7 +83,7 @@ namespace QCD {
     */
     Meooe5D(psi,Din);
 
-    DW(Din,chi,DaggerNo);
+    this->DW(Din,chi,DaggerNo);
     // ((b D_W + D_w hop terms +1) on s-diag
     axpby(chi,1.0,1.0,chi,psi); 
 
@@ -95,18 +103,20 @@ namespace QCD {
     return norm2(chi);
   }
 
-  RealD CayleyFermion5D::Mdag (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
   {
     // Under adjoint
     //D1+        D1- P-    ->   D1+^dag   P+ D2-^dag
     //D2- P+     D2+            P-D1-^dag D2+dag
 
-    LatticeFermion Din(psi._grid);
+    FermionField Din(psi._grid);
     // Apply Dw
-    DW(psi,Din,DaggerYes); 
+    this->DW(psi,Din,DaggerYes); 
 
     Meooe5D(Din,chi);
 
+    int Ls=this->Ls;
     for(int s=0;s<Ls;s++){
 
       // Collect the terms in DW
@@ -145,13 +155,15 @@ namespace QCD {
   }
 
   // half checkerboard operations
-  void CayleyFermion5D::Meooe       (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi)
   {
-    LatticeFermion tmp(psi._grid);
+    FermionField tmp(psi._grid);
     // Assemble the 5d matrix
     Meooe5D(psi,tmp); 
     std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
 
+    int Ls=this->Ls;
     for(int s=0;s<Ls;s++){
       if ( s==0 ) {
 	//	tmp = bs psi[s] + cs[s] psi[s+1}
@@ -169,26 +181,28 @@ namespace QCD {
     std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
     // Apply 4d dslash
     if ( psi.checkerboard == Odd ) {
-      DhopEO(tmp,chi,DaggerNo);
+      this->DhopEO(tmp,chi,DaggerNo);
     } else {
-      DhopOE(tmp,chi,DaggerNo);
+      this->DhopOE(tmp,chi,DaggerNo);
     }
   }
 
-  void CayleyFermion5D::MeooeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+  template<class Impl>
+  void CayleyFermion5D<Impl>::MeooeDag    (const FermionField &psi, FermionField &chi)
   {
-    LatticeFermion tmp(psi._grid);
+    FermionField tmp(psi._grid);
     // Apply 4d dslash
     if ( psi.checkerboard == Odd ) {
-      DhopEO(psi,tmp,DaggerYes);
+      this->DhopEO(psi,tmp,DaggerYes);
     } else {
-      DhopOE(psi,tmp,DaggerYes);
+      this->DhopOE(psi,tmp,DaggerYes);
     }
 
     Meooe5D(tmp,chi); 
     std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
 
     // Assemble the 5d matrix
+    int Ls=this->Ls;
     for(int s=0;s<Ls;s++){
       if ( s==0 ) {
 	axpby_ssp_pplus(chi,beo[s],tmp,   -ceo[s+1]  ,tmp,s,s+1);
@@ -205,8 +219,10 @@ namespace QCD {
 
   }
 
-  void CayleyFermion5D::Mooee       (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi)
   {
+    int Ls=this->Ls;
     for (int s=0;s<Ls;s++){
       if ( s==0 ) {
 	axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
@@ -221,8 +237,10 @@ namespace QCD {
     }
   }
 
-  void  CayleyFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
-    LatticeFermion tmp(psi._grid);
+ template<class Impl>
+  void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
+    int Ls=this->Ls;
+    FermionField tmp(psi._grid);
     // Assemble the 5d matrix
     for(int s=0;s<Ls;s++){
       if ( s==0 ) {
@@ -239,11 +257,13 @@ namespace QCD {
       }
     }
     // Apply 4d dslash fragment
-    DhopDir(tmp,chi,dir,disp);
+    this->DhopDir(tmp,chi,dir,disp);
   }
 
-  void CayleyFermion5D::MooeeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi)
   {
+    int Ls=this->Ls;
     for (int s=0;s<Ls;s++){
       // Assemble the 5d matrix
       if ( s==0 ) {
@@ -259,8 +279,10 @@ namespace QCD {
     }
   }
 
-  void CayleyFermion5D::MooeeInv    (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi)
   {
+    int Ls=this->Ls;
     // Apply (L^{\prime})^{-1}
     axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
     for (int s=1;s<Ls;s++){
@@ -283,8 +305,10 @@ namespace QCD {
     }
   }
 
-  void CayleyFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
   {
+    int Ls=this->Ls;
     // Apply (U^{\prime})^{-dagger}
     axpby_ssp (chi,1.0,psi,     0.0,psi,0,0);      // chi[0]=psi[0]
     for (int s=1;s<Ls;s++){
@@ -307,58 +331,64 @@ namespace QCD {
   }
 
   // force terms; five routines; default to Dhop on diagonal
-  void CayleyFermion5D::MDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+  template<class Impl>
+  void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion Din(V._grid);
+    FermionField Din(V._grid);
 
     if ( dag == DaggerNo ) {
       //      U d/du [D_w D5] V = U d/du DW D5 V
       Meooe5D(V,Din);
-      DhopDeriv(mat,U,Din,dag);
+      this->DhopDeriv(mat,U,Din,dag);
     } else {
       //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
       Meooe5D(U,Din);
-      DhopDeriv(mat,Din,V,dag);
+      this->DhopDeriv(mat,Din,V,dag);
     }
   };
-  void CayleyFermion5D::MoeDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion Din(V._grid);
+    FermionField Din(V._grid);
 
     if ( dag == DaggerNo ) {
       //      U d/du [D_w D5] V = U d/du DW D5 V
       Meooe5D(V,Din);
-      DhopDerivOE(mat,U,Din,dag);
+      this->DhopDerivOE(mat,U,Din,dag);
     } else {
       //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
       Meooe5D(U,Din);
-      DhopDerivOE(mat,Din,V,dag);
+      this->DhopDerivOE(mat,Din,V,dag);
     }
   };
-  void CayleyFermion5D::MeoDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion Din(V._grid);
+    FermionField Din(V._grid);
 
     if ( dag == DaggerNo ) {
       //      U d/du [D_w D5] V = U d/du DW D5 V
       Meooe5D(V,Din);
-      DhopDerivEO(mat,U,Din,dag);
+      this->DhopDerivEO(mat,U,Din,dag);
     } else {
       //      U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
       Meooe5D(U,Din);
-      DhopDerivEO(mat,Din,V,dag);
+      this->DhopDerivEO(mat,Din,V,dag);
     }
   };
   
   // Tanh
-  void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
   {
     SetCoefficientsZolotarev(1.0,zdata,b,c);
 
   }
   //Zolo
-  void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
+ template<class Impl>
+  void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
   {
+    int Ls=this->Ls;
 
     ///////////////////////////////////////////////////////////
     // The Cayley coeffs (unprec)
@@ -408,8 +438,8 @@ namespace QCD {
     ceo.resize(Ls);
     
     for(int i=0;i<Ls;i++){
-      bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0);
-      cee[i]=as[i]*(1.0-cs[i]*(4.0-M5));
+      bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
+      cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
       beo[i]=as[i]*bs[i];
       ceo[i]=-as[i]*cs[i];
     }
@@ -462,6 +492,8 @@ namespace QCD {
     }
   }
 
+  FermOpTemplateInstantiate(CayleyFermion5D);
+
 }}
 
 
diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h
index a139c32e..c0d2a292 100644
--- a/lib/qcd/action/fermion/CayleyFermion5D.h
+++ b/lib/qcd/action/fermion/CayleyFermion5D.h
@@ -5,33 +5,36 @@ namespace Grid {
 
   namespace QCD {
 
-    class CayleyFermion5D : public WilsonFermion5D
+    template<class Impl>
+    class CayleyFermion5D : public WilsonFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  M    (const FermionField &in, FermionField &out);
+      virtual RealD  Mdag (const FermionField &in, FermionField &out);
 
       // half checkerboard operations
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Meooe       (const FermionField &in, FermionField &out);
+      virtual void   MeooeDag    (const FermionField &in, FermionField &out);
+      virtual void   Mooee       (const FermionField &in, FermionField &out);
+      virtual void   MooeeDag    (const FermionField &in, FermionField &out);
+      virtual void   MooeeInv    (const FermionField &in, FermionField &out);
+      virtual void   MooeeInvDag (const FermionField &in, FermionField &out);
       virtual void   Instantiatable(void)=0;
 
       // force terms; five routines; default to Dhop on diagonal
-      virtual void MDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void MoeDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void MeoDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
+      virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 
       // Efficient support for multigrid coarsening
-      virtual void  Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
+      virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp);
       
-      void   Meooe5D       (const LatticeFermion &in, LatticeFermion &out);
-      void   MeooeDag5D    (const LatticeFermion &in, LatticeFermion &out);
+      void   Meooe5D       (const FermionField &in, FermionField &out);
+      void   MeooeDag5D    (const FermionField &in, FermionField &out);
 
       //    protected:
       RealD mass;
@@ -56,7 +59,7 @@ namespace Grid {
       std::vector<RealD> dee;    
 
       // Constructors
-      CayleyFermion5D(LatticeGaugeField &_Umu,
+      CayleyFermion5D(GaugeField &_Umu,
 		      GridCartesian         &FiveDimGrid,
 		      GridRedBlackCartesian &FiveDimRedBlackGrid,
 		      GridCartesian         &FourDimGrid,
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
index cae73dfc..846e6a1b 100644
--- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc
@@ -3,11 +3,13 @@
 namespace Grid {
   namespace QCD {
 
-    void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
+    template<class Impl>
+    void ContinuedFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale)
     {
       SetCoefficientsZolotarev(1.0/scale,zdata);
     }
-    void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
+    template<class Impl>
+    void ContinuedFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata)
     {
       // How to check Ls matches??
       //      std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
@@ -16,7 +18,7 @@ namespace Grid {
       //      std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
       //      std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
       //      std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
-
+      int Ls = this->Ls;
       assert(zdata->db==Ls);// Beta has Ls coeffs
 
       R=(1+this->mass)/(1-this->mass);
@@ -39,7 +41,7 @@ namespace Grid {
 
 
       ZoloHiInv =1.0/zolo_hi;
-      dw_diag = (4.0-M5)*ZoloHiInv;
+      dw_diag = (4.0-this->M5)*ZoloHiInv;
     
       See.resize(Ls);
       Aee.resize(Ls);
@@ -61,11 +63,14 @@ namespace Grid {
 
 
 
-    RealD  ContinuedFractionFermion5D::M           (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    RealD  ContinuedFractionFermion5D<Impl>::M           (const FermionField &psi, FermionField &chi)
     {
-      LatticeFermion D(psi._grid);
+      int Ls = this->Ls;
 
-      DW(psi,D,DaggerNo); 
+      FermionField D(psi._grid);
+
+      this->DW(psi,D,DaggerNo); 
 
       int sign=1;
       for(int s=0;s<Ls;s++){
@@ -83,15 +88,20 @@ namespace Grid {
       }
       return norm2(chi);
     }
-    RealD  ContinuedFractionFermion5D::Mdag        (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    RealD  ContinuedFractionFermion5D<Impl>::Mdag        (const FermionField &psi, FermionField &chi)
     {
       // This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag
       // The rest of matrix is symmetric.
       // Can ignore "dag"
       return M(psi,chi);
     }
-    void  ContinuedFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
-      DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
+    template<class Impl>
+    void  ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
+      int Ls = this->Ls;
+
+      this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian
+
       int sign=1;
       for(int s=0;s<Ls;s++){
 	if ( s==(Ls-1) ){
@@ -102,13 +112,16 @@ namespace Grid {
 	sign=-sign; 
       }
     }
-    void   ContinuedFractionFermion5D::Meooe       (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    void   ContinuedFractionFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi)
     {
+      int Ls = this->Ls;
+
       // Apply 4d dslash
       if ( psi.checkerboard == Odd ) {
-	DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
+	this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
       } else {
-	DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
+	this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian
       }
       
       int sign=1;
@@ -121,12 +134,16 @@ namespace Grid {
 	sign=-sign; 
       }
     }
-    void   ContinuedFractionFermion5D::MeooeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    void   ContinuedFractionFermion5D<Impl>::MeooeDag    (const FermionField &psi, FermionField &chi)
     {
-      Meooe(psi,chi);
+      this->Meooe(psi,chi);
     }
-    void   ContinuedFractionFermion5D::Mooee       (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    void   ContinuedFractionFermion5D<Impl>::Mooee       (const FermionField &psi, FermionField &chi)
     {
+      int Ls = this->Ls;
+
       int sign=1;
       for(int s=0;s<Ls;s++){
 	if ( s==0 ) {
@@ -144,12 +161,16 @@ namespace Grid {
       }
     }
 
-    void   ContinuedFractionFermion5D::MooeeDag    (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    void   ContinuedFractionFermion5D<Impl>::MooeeDag    (const FermionField &psi, FermionField &chi)
     {
-      Mooee(psi,chi);
+      this->Mooee(psi,chi);
     }
-    void   ContinuedFractionFermion5D::MooeeInv    (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    void   ContinuedFractionFermion5D<Impl>::MooeeInv    (const FermionField &psi, FermionField &chi)
     {
+      int Ls = this->Ls;
+
       // Apply Linv
       axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); 
       for(int s=1;s<Ls;s++){
@@ -165,15 +186,19 @@ namespace Grid {
 	axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1);
       }
     }
-    void   ContinuedFractionFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi)
+    template<class Impl>
+    void   ContinuedFractionFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
     {
-      MooeeInv(psi,chi);
+      this->MooeeInv(psi,chi);
     }
 
   // force terms; five routines; default to Dhop on diagonal
-   void ContinuedFractionFermion5D::MDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+    template<class Impl>
+   void ContinuedFractionFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion D(V._grid);
+    int Ls = this->Ls;
+
+    FermionField D(V._grid);
 
     int sign=1;
     for(int s=0;s<Ls;s++){
@@ -184,11 +209,14 @@ namespace Grid {
       }
       sign=-sign; 
     }
-    DhopDeriv(mat,D,V,DaggerNo); 
+    this->DhopDeriv(mat,D,V,DaggerNo); 
   };
-   void ContinuedFractionFermion5D::MoeDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+    template<class Impl>
+   void ContinuedFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion D(V._grid);
+    int Ls = this->Ls;
+
+    FermionField D(V._grid);
 
     int sign=1;
     for(int s=0;s<Ls;s++){
@@ -199,11 +227,14 @@ namespace Grid {
       }
       sign=-sign; 
     }
-    DhopDerivOE(mat,D,V,DaggerNo); 
+    this->DhopDerivOE(mat,D,V,DaggerNo); 
   };
-   void ContinuedFractionFermion5D::MeoDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+  template<class Impl>
+  void ContinuedFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion D(V._grid);
+    int Ls = this->Ls;
+
+    FermionField D(V._grid);
 
     int sign=1;
     for(int s=0;s<Ls;s++){
@@ -214,25 +245,29 @@ namespace Grid {
       }
       sign=-sign; 
     }
-    DhopDerivEO(mat,D,V,DaggerNo); 
+    this->DhopDerivEO(mat,D,V,DaggerNo); 
   };
     
     // Constructors
-    ContinuedFractionFermion5D::ContinuedFractionFermion5D(
-							   LatticeGaugeField &_Umu,
+    template<class Impl>
+    ContinuedFractionFermion5D<Impl>::ContinuedFractionFermion5D(
+							   GaugeField &_Umu,
 							   GridCartesian         &FiveDimGrid,
 							   GridRedBlackCartesian &FiveDimRedBlackGrid,
 							   GridCartesian         &FourDimGrid,
 							   GridRedBlackCartesian &FourDimRedBlackGrid,
 							   RealD _mass,RealD M5) :
-      WilsonFermion5D(_Umu,
+      WilsonFermion5D<Impl>(_Umu,
 		      FiveDimGrid, FiveDimRedBlackGrid,
 		      FourDimGrid, FourDimRedBlackGrid,M5),
       mass(_mass)
     {
+      int Ls = this->Ls;
       assert((Ls&0x1)==1); // Odd Ls required
     }
 
+    FermOpTemplateInstantiate(ContinuedFractionFermion5D);
+
   }
 }
 
diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
index 7d0c75bc..b1a7fba2 100644
--- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
+++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h
@@ -5,35 +5,38 @@ namespace Grid {
 
   namespace QCD {
 
-    class ContinuedFractionFermion5D : public WilsonFermion5D
+    template<class Impl>
+    class ContinuedFractionFermion5D : public WilsonFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  M    (const FermionField &in, FermionField &out);
+      virtual RealD  Mdag (const FermionField &in, FermionField &out);
 
       // half checkerboard operaions
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Meooe       (const FermionField &in, FermionField &out);
+      virtual void   MeooeDag    (const FermionField &in, FermionField &out);
+      virtual void   Mooee       (const FermionField &in, FermionField &out);
+      virtual void   MooeeDag    (const FermionField &in, FermionField &out);
+      virtual void   MooeeInv    (const FermionField &in, FermionField &out);
+      virtual void   MooeeInvDag (const FermionField &in, FermionField &out);
 
       // force terms; five routines; default to Dhop on diagonal
-      virtual void MDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void MoeDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void MeoDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
+      virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 
       //      virtual void   Instantiatable(void)=0;
       virtual void   Instantiatable(void) =0;
 
       // Efficient support for multigrid coarsening
-      virtual void  Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
+      virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp);
 
       // Constructors
-      ContinuedFractionFermion5D(LatticeGaugeField &_Umu,
+      ContinuedFractionFermion5D(GaugeField &_Umu,
 				 GridCartesian         &FiveDimGrid,
 				 GridRedBlackCartesian &FiveDimRedBlackGrid,
 				 GridCartesian         &FourDimGrid,
diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h
index 498d8b34..00a6baba 100644
--- a/lib/qcd/action/fermion/DomainWallFermion.h
+++ b/lib/qcd/action/fermion/DomainWallFermion.h
@@ -7,24 +7,27 @@ namespace Grid {
 
   namespace QCD {
 
-    class DomainWallFermion : public CayleyFermion5D
+    template<class Impl>
+    class DomainWallFermion : public CayleyFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void) {};
       // Constructors
-      DomainWallFermion(LatticeGaugeField &_Umu,
+      DomainWallFermion(GaugeField &_Umu,
 			GridCartesian         &FiveDimGrid,
 			GridRedBlackCartesian &FiveDimRedBlackGrid,
 			GridCartesian         &FourDimGrid,
 			GridRedBlackCartesian &FourDimRedBlackGrid,
 			RealD _mass,RealD _M5) : 
 
-      CayleyFermion5D(_Umu,
-		      FiveDimGrid,
-		      FiveDimRedBlackGrid,
-		      FourDimGrid,
-		      FourDimRedBlackGrid,_mass,_M5)
+      CayleyFermion5D<Impl>(_Umu,
+			    FiveDimGrid,
+			    FiveDimRedBlackGrid,
+			    FourDimGrid,
+			    FourDimRedBlackGrid,_mass,_M5)
 
       {
 	RealD eps = 1.0;
@@ -32,9 +35,9 @@ namespace Grid {
 	Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
 	assert(zdata->n==this->Ls);
 	
-	std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<Ls<<std::endl;
+	std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
 	// Call base setter
-	this->CayleyFermion5D::SetCoefficientsTanh(zdata,1.0,0.0);
+	this->SetCoefficientsTanh(zdata,1.0,0.0);
 
 	Approx::zolotarev_free(zdata);
       }
diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h
index 57040037..75641b83 100644
--- a/lib/qcd/action/fermion/FermionOperator.h
+++ b/lib/qcd/action/fermion/FermionOperator.h
@@ -5,15 +5,169 @@ namespace Grid {
 
   namespace QCD {
 
+    ////////////////////////////////////////////////////////////////
+    // Hardwire to four spinors, allow to select 
+    // between gauge representation rank, and gparity/flavour index,
+    // and single/double precision.
+    ////////////////////////////////////////////////////////////////
+
+    template<class S,int Nrepresentation=Nc>
+    class WilsonImpl { 
+    public:
+
+      typedef S Simd;
+
+      template<typename vtype> using iImplSpinor             = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
+      template<typename vtype> using iImplHalfSpinor         = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
+
+      template<typename vtype> using iImplGaugeLink          = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
+      template<typename vtype> using iImplGaugeField         = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd  >;
+      template<typename vtype> using iImplDoubledGaugeField  = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
+    
+      typedef iImplSpinor    <Simd>           SiteSpinor;
+      typedef iImplHalfSpinor<Simd>           SiteHalfSpinor;
+      typedef iImplGaugeLink <Simd>           SiteGaugeLink;
+      typedef iImplGaugeField<Simd>           SiteGaugeField;
+      typedef iImplDoubledGaugeField<Simd>    SiteDoubledGaugeField;
+
+      typedef Lattice<SiteSpinor>                 FermionField;
+      typedef Lattice<SiteGaugeLink>            GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
+      typedef Lattice<SiteGaugeField>               GaugeField;
+      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
+
+      typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
+
+      // provide the multiply by link that is differentiated between Gparity (with flavour index) and 
+      // non-Gparity
+      static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu){
+        mult(&phi(),&U(mu),&chi());
+      }
+
+      static inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
+      {
+        conformable(Uds._grid,GaugeGrid);
+        conformable(Umu._grid,GaugeGrid);
+        GaugeLinkField U(GaugeGrid);
+        for(int mu=0;mu<Nd;mu++){
+  	  U = PeekIndex<LorentzIndex>(Umu,mu);
+	  PokeIndex<LorentzIndex>(Uds,U,mu);
+	  U = adj(Cshift(U,mu,-1));
+	  PokeIndex<LorentzIndex>(Uds,U,mu+4);
+	}
+      }
+
+    };
+
+    typedef WilsonImpl<vComplex,Nc>  WilsonImplR; // Real.. whichever prec
+    typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
+    typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
+
+
+
+    template<class S,int Nrepresentation=Nc>
+    class GparityWilsonImpl { 
+    public:
+
+      typedef S Simd;
+
+      template<typename vtype> using iImplSpinor             = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >;
+      template<typename vtype> using iImplHalfSpinor         = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
+      template<typename vtype> using iImplGaugeField         = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd  >;
+
+      template<typename vtype> using iImplGaugeLink          = iScalar<iScalar<iScalar<iMatrix<vtype, Nrepresentation> > > >;
+      template<typename vtype> using iImplDoubledGaugeField  = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
+    
+      typedef iImplSpinor    <Simd>           SiteSpinor;
+      typedef iImplHalfSpinor<Simd>           SiteHalfSpinor;
+      typedef iImplGaugeLink <Simd>           SiteGaugeLink;
+      typedef iImplGaugeField<Simd>           SiteGaugeField;
+      typedef iImplDoubledGaugeField<Simd>    SiteDoubledGaugeField;
+
+      typedef Lattice<SiteSpinor>                 FermionField;
+      typedef Lattice<SiteGaugeLink>            GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
+      typedef Lattice<SiteGaugeField>               GaugeField;
+      typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
+
+      typedef GparityWilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
+
+      // provide the multiply by link that is differentiated between Gparity (with flavour index) and 
+      // non-Gparity
+      static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu){
+	for(int f=0;f<Ngp;f++){
+	  mult(&phi(f),&U(f)(mu),&chi(f));
+	}
+      }
+
+      static inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
+      {
+        conformable(Uds._grid,GaugeGrid);
+        conformable(Umu._grid,GaugeGrid);
+
+        GaugeLinkField Utmp(GaugeGrid);
+        GaugeLinkField U(GaugeGrid);
+        GaugeLinkField Uconj(GaugeGrid);
+
+	Lattice<iScalar<vInteger> > coor(GaugeGrid);
+	
+	std::vector<int> gpdirs({1,0,0,0});
+
+        for(int mu=0;mu<Nd;mu++){
+
+	  LatticeCoordinate(coor,mu);
+
+  	  U     = PeekIndex<LorentzIndex>(Umu,mu);
+	  Uconj = conj(U);
+
+	  int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
+
+	  if ( gpdirs[mu] ) { 
+	    Uconj = where(coor==neglink,-Uconj,Uconj);
+	  }
+PARALLEL_FOR_LOOP
+	  for(auto ss=U.begin();ss<U.end();ss++){
+	    Uds[ss](0)(mu) = U[ss]();
+	    Uds[ss](1)(mu) = Uconj[ss]();
+	  }
+
+
+	  U     = adj(Cshift(U,mu,-1));      // correct except for spanning the boundary
+	  Uconj = adj(Cshift(Uconj,mu,-1));
+
+	  Utmp = where(coor==0,U,Uconj);
+PARALLEL_FOR_LOOP
+	  for(auto ss=U.begin();ss<U.end();ss++){
+	    Uds[ss](1)(mu) = Utmp[ss]();
+	  }
+
+	  Utmp = where(coor==0,Uconj,U);
+PARALLEL_FOR_LOOP
+	  for(auto ss=U.begin();ss<U.end();ss++){
+	    Uds[ss](0)(mu) = Utmp[ss]();
+	  }
+
+
+	}
+      }
+
+    };
+
+    typedef WilsonImpl<vComplex,Nc>  WilsonImplR; // Real.. whichever prec
+    typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
+    typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
+
+
+    
     //////////////////////////////////////////////////////////////////////////////
     // Four component fermions
     // Should type template the vector and gauge types
     // Think about multiple representations
     //////////////////////////////////////////////////////////////////////////////
-    template<class FermionField,class GaugeField>
-    class FermionOperator : public CheckerBoardedSparseMatrixBase<FermionField>
+    template<class Impl>
+    class FermionOperator : public CheckerBoardedSparseMatrixBase<typename Impl::FermionField>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       GridBase * Grid(void)   { return FermionGrid(); };   // this is all the linalg routines need to know
       GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); };
@@ -42,15 +196,15 @@ namespace Grid {
       virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D
 
       // force terms; five routines; default to Dhop on diagonal
-      virtual void MDeriv  (LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
-      virtual void MoeDeriv(LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
-      virtual void MeoDeriv(LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
-      virtual void MooDeriv(LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
-      virtual void MeeDeriv(LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
+      virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
+      virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
+      virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
+      virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
+      virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
 
-      virtual void DhopDeriv  (LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
-      virtual void DhopDerivEO(LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
-      virtual void DhopDerivOE(LatticeGaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
+      virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
+      virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
+      virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
 
 
       virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's
@@ -65,4 +219,5 @@ namespace Grid {
 
   }
 }
+
 #endif
diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h
index 5aaaa188..e84661dc 100644
--- a/lib/qcd/action/fermion/MobiusFermion.h
+++ b/lib/qcd/action/fermion/MobiusFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class MobiusFermion : public CayleyFermion5D
+    template<class Impl>
+    class MobiusFermion : public CayleyFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void) {};
       // Constructors
-      MobiusFermion(LatticeGaugeField &_Umu,
+      MobiusFermion(GaugeField &_Umu,
 		    GridCartesian         &FiveDimGrid,
 		    GridRedBlackCartesian &FiveDimRedBlackGrid,
 		    GridCartesian         &FourDimGrid,
@@ -21,7 +24,7 @@ namespace Grid {
 		    RealD _mass,RealD _M5,
 		    RealD b, RealD c) : 
       
-      CayleyFermion5D(_Umu,
+      CayleyFermion5D<Impl>(_Umu,
 		      FiveDimGrid,
 		      FiveDimRedBlackGrid,
 		      FourDimGrid,
@@ -30,12 +33,12 @@ namespace Grid {
       {
 	RealD eps = 1.0;
 
-	std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Tanh approx"<<std::endl;
+	std::cout<<GridLogMessage << "MobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Tanh approx"<<std::endl;
 	Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
 	assert(zdata->n==this->Ls);
 	
 	// Call base setter
-	this->CayleyFermion5D::SetCoefficientsTanh(zdata,b,c);
+	this->SetCoefficientsTanh(zdata,b,c);
 
 	Approx::zolotarev_free(zdata);
  
diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
index 5f74702c..1c572326 100644
--- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h
+++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class MobiusZolotarevFermion : public CayleyFermion5D
+    template<class Impl>
+    class MobiusZolotarevFermion : public CayleyFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void) {};
       // Constructors
-       MobiusZolotarevFermion(LatticeGaugeField &_Umu,
+       MobiusZolotarevFermion(GaugeField &_Umu,
 			      GridCartesian         &FiveDimGrid,
 			      GridRedBlackCartesian &FiveDimRedBlackGrid,
 			      GridCartesian         &FourDimGrid,
@@ -22,7 +25,7 @@ namespace Grid {
 			      RealD b, RealD c,
 			      RealD lo, RealD hi) : 
       
-      CayleyFermion5D(_Umu,
+      CayleyFermion5D<Impl>(_Umu,
 		      FiveDimGrid,
 		      FiveDimRedBlackGrid,
 		      FourDimGrid,
@@ -34,10 +37,10 @@ namespace Grid {
 	Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0);
 	assert(zdata->n==this->Ls);
 
-	std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
+	std::cout<<GridLogMessage << "MobiusZolotarevFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" Zolotarev range ["<<lo<<","<<hi<<"]"<<std::endl;
 	
 	// Call base setter
-	this->CayleyFermion5D::SetCoefficientsZolotarev(hi,zdata,b,c);
+	this->SetCoefficientsZolotarev(hi,zdata,b,c);
  
 	Approx::zolotarev_free(zdata);
       }
diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h
index e764c8ae..0e8d4e19 100644
--- a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h
@@ -7,12 +7,15 @@ namespace Grid {
 
   namespace QCD {
 
-    class OverlapWilsonCayleyTanhFermion : public MobiusFermion
+    template<class Impl>
+    class OverlapWilsonCayleyTanhFermion : public MobiusFermion<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       // Constructors
-    OverlapWilsonCayleyTanhFermion(LatticeGaugeField &_Umu,
+    OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
 				   GridCartesian         &FiveDimGrid,
 				   GridRedBlackCartesian &FiveDimRedBlackGrid,
 				   GridCartesian         &FourDimGrid,
@@ -21,11 +24,11 @@ namespace Grid {
 				   RealD scale) :
       
       // b+c=scale, b-c = 0 <=> b =c = scale/2
-      MobiusFermion(_Umu,
-		    FiveDimGrid,
-		    FiveDimRedBlackGrid,
-		    FourDimGrid,
-		    FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale)
+      MobiusFermion<Impl>(_Umu,
+			  FiveDimGrid,
+			  FiveDimRedBlackGrid,
+			  FourDimGrid,
+			  FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale)
 	{
 	}
     };
diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h
index 82c43fb7..549292db 100644
--- a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion
+    template<class Impl>
+    class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       // Constructors
 
-    OverlapWilsonCayleyZolotarevFermion(LatticeGaugeField &_Umu,
+    OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
 					GridCartesian         &FiveDimGrid,
 					GridRedBlackCartesian &FiveDimRedBlackGrid,
 					GridCartesian         &FourDimGrid,
@@ -21,7 +24,7 @@ namespace Grid {
 					RealD _mass,RealD _M5,
 					RealD lo, RealD hi) : 
       // b+c=1.0, b-c = 0 <=> b =c = 1/2
-      MobiusZolotarevFermion(_Umu,
+      MobiusZolotarevFermion<Impl>(_Umu,
 			     FiveDimGrid,
 			     FiveDimRedBlackGrid,
 			     FourDimGrid,
diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
index 6ecd7822..73baffa7 100644
--- a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D
+    template<class Impl>
+    class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void){};
       // Constructors
-    OverlapWilsonContFracTanhFermion(LatticeGaugeField &_Umu,
+    OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
 				     GridCartesian         &FiveDimGrid,
 				     GridRedBlackCartesian &FiveDimRedBlackGrid,
 				     GridCartesian         &FourDimGrid,
@@ -22,16 +25,16 @@ namespace Grid {
 				     RealD scale) :
       
       // b+c=scale, b-c = 0 <=> b =c = scale/2
-      ContinuedFractionFermion5D(_Umu,
+      ContinuedFractionFermion5D<Impl>(_Umu,
 				 FiveDimGrid,
 				 FiveDimRedBlackGrid,
 				 FourDimGrid,
 				 FourDimRedBlackGrid,_mass,_M5)
 	{
-	  assert((Ls&0x1)==1); // Odd Ls required
-	  int nrational=Ls-1;// Even rational order
+	  assert((this->Ls&0x1)==1); // Odd Ls required
+	  int nrational=this->Ls-1;// Even rational order
 	  Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
-	  SetCoefficientsTanh(zdata,scale);
+	  this->SetCoefficientsTanh(zdata,scale);
 	  Approx::zolotarev_free(zdata);
 	}
     };
diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
index 595daef3..3c662e92 100644
--- a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D
+    template<class Impl>
+    class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void){};
       // Constructors
-    OverlapWilsonContFracZolotarevFermion(LatticeGaugeField &_Umu,
+    OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
 					  GridCartesian         &FiveDimGrid,
 					  GridRedBlackCartesian &FiveDimRedBlackGrid,
 					  GridCartesian         &FourDimGrid,
@@ -22,19 +25,19 @@ namespace Grid {
 					  RealD lo,RealD hi):
       
       // b+c=scale, b-c = 0 <=> b =c = scale/2
-      ContinuedFractionFermion5D(_Umu,
+      ContinuedFractionFermion5D<Impl>(_Umu,
 				 FiveDimGrid,
 				 FiveDimRedBlackGrid,
 				 FourDimGrid,
 				 FourDimRedBlackGrid,_mass,_M5)
 	{
-	  assert((Ls&0x1)==1); // Odd Ls required
+	  assert((this->Ls&0x1)==1); // Odd Ls required
 
-	  int nrational=Ls;// Odd rational order
+	  int nrational=this->Ls;// Odd rational order
 	  RealD eps = lo/hi;
 
 	  Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
-	  SetCoefficientsZolotarev(hi,zdata);
+	  this->SetCoefficientsZolotarev(hi,zdata);
 	  Approx::zolotarev_free(zdata);
 
 	}
diff --git a/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h
index 1fa6c099..b017a9d7 100644
--- a/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D
+    template<class Impl>
+    class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void){};
       // Constructors
-    OverlapWilsonPartialFractionTanhFermion(LatticeGaugeField &_Umu,
+    OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
 					    GridCartesian         &FiveDimGrid,
 					    GridRedBlackCartesian &FiveDimRedBlackGrid,
 					    GridCartesian         &FourDimGrid,
@@ -22,16 +25,16 @@ namespace Grid {
 					    RealD scale) :
       
       // b+c=scale, b-c = 0 <=> b =c = scale/2
-      PartialFractionFermion5D(_Umu,
-			       FiveDimGrid,
-			       FiveDimRedBlackGrid,
-			       FourDimGrid,
-			       FourDimRedBlackGrid,_mass,_M5)
+      PartialFractionFermion5D<Impl>(_Umu,
+				     FiveDimGrid,
+				     FiveDimRedBlackGrid,
+				     FourDimGrid,
+				     FourDimRedBlackGrid,_mass,_M5)
 	{
-	  assert((Ls&0x1)==1); // Odd Ls required
-	  int nrational=Ls-1;// Even rational order
+	  assert((this->Ls&0x1)==1); // Odd Ls required
+	  int nrational=this->Ls-1;// Even rational order
 	  Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham
-	  SetCoefficientsTanh(zdata,scale);
+	  this->SetCoefficientsTanh(zdata,scale);
 	  Approx::zolotarev_free(zdata);
 	}
     };
diff --git a/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h
index 3997c17f..3294f079 100644
--- a/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h
+++ b/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h
@@ -7,13 +7,16 @@ namespace Grid {
 
   namespace QCD {
 
-    class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D
+    template<class Impl>
+    class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       virtual void   Instantiatable(void){};
       // Constructors
-    OverlapWilsonPartialFractionZolotarevFermion(LatticeGaugeField &_Umu,
+    OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
 					  GridCartesian         &FiveDimGrid,
 					  GridRedBlackCartesian &FiveDimRedBlackGrid,
 					  GridCartesian         &FourDimGrid,
@@ -22,19 +25,19 @@ namespace Grid {
 					  RealD lo,RealD hi):
       
       // b+c=scale, b-c = 0 <=> b =c = scale/2
-      PartialFractionFermion5D(_Umu,
-			       FiveDimGrid,
-			       FiveDimRedBlackGrid,
-			       FourDimGrid,
-			       FourDimRedBlackGrid,_mass,_M5)
+      PartialFractionFermion5D<Impl>(_Umu,
+				     FiveDimGrid,
+				     FiveDimRedBlackGrid,
+				     FourDimGrid,
+				     FourDimRedBlackGrid,_mass,_M5)
 	{
-	  assert((Ls&0x1)==1); // Odd Ls required
+	  assert((this->Ls&0x1)==1); // Odd Ls required
 
-	  int nrational=Ls;// Odd rational order
+	  int nrational=this->Ls;// Odd rational order
 	  RealD eps = lo/hi;
 
 	  Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0);
-	  SetCoefficientsZolotarev(hi,zdata);
+	  this->SetCoefficientsZolotarev(hi,zdata);
 	  Approx::zolotarev_free(zdata);
 
 	}
diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc
index 74174f51..81afb653 100644
--- a/lib/qcd/action/fermion/PartialFractionFermion5D.cc
+++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc
@@ -2,12 +2,15 @@
 namespace Grid {
   namespace QCD {
 
-    void  PartialFractionFermion5D::Mdir (const LatticeFermion &psi, LatticeFermion &chi,int dir,int disp){
+
+    template<class Impl>
+    void  PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
       // this does both dag and undag but is trivial; make a common helper routing
 
       int sign = 1;
+      int Ls = this->Ls;
 
-      DhopDir(psi,chi,dir,disp);
+      this->DhopDir(psi,chi,dir,disp);
 
       int nblock=(Ls-1)/2;
       for(int b=0;b<nblock;b++){
@@ -18,15 +21,16 @@ namespace Grid {
       ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
 
     }
-    void   PartialFractionFermion5D::Meooe_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    template<class Impl>
+    void   PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
     {
-      // this does both dag and undag but is trivial; make a common helper routing
+      int Ls = this->Ls;
       int sign = dag ? (-1) : 1;
 
       if ( psi.checkerboard == Odd ) {
-	DhopEO(psi,chi,DaggerNo);
+	this->DhopEO(psi,chi,DaggerNo);
       } else {
-	DhopOE(psi,chi,DaggerNo);
+	this->DhopOE(psi,chi,DaggerNo);
       }
 
       int nblock=(Ls-1)/2;
@@ -38,10 +42,12 @@ namespace Grid {
       ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
     }
 
-    void   PartialFractionFermion5D::Mooee_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    template<class Impl>
+    void   PartialFractionFermion5D<Impl>::Mooee_internal(const FermionField &psi, FermionField &chi,int dag)
     {
       // again dag and undag are trivially related
       int sign = dag ? (-1) : 1;
+      int Ls = this->Ls;
       
       int nblock=(Ls-1)/2;
       for(int b=0;b<nblock;b++){
@@ -69,11 +75,13 @@ namespace Grid {
       }
     }
 
-    void   PartialFractionFermion5D::MooeeInv_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    template<class Impl>
+    void   PartialFractionFermion5D<Impl>::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag)
     {
       int sign = dag ? (-1) : 1;
+      int Ls = this->Ls;
 
-      LatticeFermion tmp(psi._grid);
+      FermionField tmp(psi._grid);
       
       ///////////////////////////////////////////////////////////////////////////////////////
       //Linv
@@ -129,10 +137,12 @@ namespace Grid {
       axpby_ssp  (chi, 1.0/scale,tmp,0.0,tmp,Ls-1,Ls-1);
     }
 
-    void   PartialFractionFermion5D::M_internal(const LatticeFermion &psi, LatticeFermion &chi,int dag)
+    template<class Impl>
+    void   PartialFractionFermion5D<Impl>::M_internal(const FermionField &psi, FermionField &chi,int dag)
     {
-      LatticeFermion D(psi._grid);
+      FermionField D(psi._grid);
   
+      int Ls = this->Ls;
       int sign = dag ? (-1) : 1;
 
       // For partial frac Hw case (b5=c5=1) chroma quirkily computes
@@ -186,7 +196,7 @@ namespace Grid {
       //           ( 0     -sqrt(p_i)*amax   |  2 R gamma_5 + p0/amax 2H
       //
 
-      DW(psi,D,DaggerNo); 
+      this->DW(psi,D,DaggerNo); 
 
       int nblock=(Ls-1)/2;
       for(int b=0;b<nblock;b++){
@@ -217,48 +227,59 @@ namespace Grid {
 
     }
 
-    RealD  PartialFractionFermion5D::M    (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    RealD  PartialFractionFermion5D<Impl>::M    (const FermionField &in, FermionField &out)
     {
       M_internal(in,out,DaggerNo);
       return norm2(out);
     }
-    RealD  PartialFractionFermion5D::Mdag (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    RealD  PartialFractionFermion5D<Impl>::Mdag (const FermionField &in, FermionField &out)
     {
       M_internal(in,out,DaggerYes);
       return norm2(out);
     }
 
-    void PartialFractionFermion5D::Meooe       (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    void PartialFractionFermion5D<Impl>::Meooe       (const FermionField &in, FermionField &out)
     {
       Meooe_internal(in,out,DaggerNo);
     }
-    void PartialFractionFermion5D::MeooeDag    (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    void PartialFractionFermion5D<Impl>::MeooeDag    (const FermionField &in, FermionField &out)
     {
       Meooe_internal(in,out,DaggerYes);
     }
-    void PartialFractionFermion5D::Mooee       (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    void PartialFractionFermion5D<Impl>::Mooee       (const FermionField &in, FermionField &out)
     {
       Mooee_internal(in,out,DaggerNo);
     }
-    void PartialFractionFermion5D::MooeeDag    (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    void PartialFractionFermion5D<Impl>::MooeeDag    (const FermionField &in, FermionField &out)
     {
       Mooee_internal(in,out,DaggerYes);
     }
 
-    void PartialFractionFermion5D::MooeeInv    (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    void PartialFractionFermion5D<Impl>::MooeeInv    (const FermionField &in, FermionField &out)
     {
       MooeeInv_internal(in,out,DaggerNo);
     }
-    void PartialFractionFermion5D::MooeeInvDag (const LatticeFermion &in, LatticeFermion &out)
+    template<class Impl>
+    void PartialFractionFermion5D<Impl>::MooeeInvDag (const FermionField &in, FermionField &out)
     {
       MooeeInv_internal(in,out,DaggerYes);
     }
 
 
   // force terms; five routines; default to Dhop on diagonal
-   void PartialFractionFermion5D::MDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+    template<class Impl>
+   void PartialFractionFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion D(V._grid);
+    int Ls = this->Ls;
+
+    FermionField D(V._grid);
 
     int nblock=(Ls-1)/2;
     for(int b=0;b<nblock;b++){
@@ -268,11 +289,14 @@ namespace Grid {
     }
     ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
 
-    DhopDeriv(mat,D,V,DaggerNo); 
+    this->DhopDeriv(mat,D,V,DaggerNo); 
   };
-   void PartialFractionFermion5D::MoeDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+    template<class Impl>
+   void PartialFractionFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion D(V._grid);
+    int Ls = this->Ls;
+
+    FermionField D(V._grid);
 
     int nblock=(Ls-1)/2;
     for(int b=0;b<nblock;b++){
@@ -282,11 +306,14 @@ namespace Grid {
     }
     ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
 
-    DhopDerivOE(mat,D,V,DaggerNo); 
+    this->DhopDerivOE(mat,D,V,DaggerNo); 
   };
-   void PartialFractionFermion5D::MeoDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
+    template<class Impl>
+   void PartialFractionFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
   {
-    LatticeFermion D(V._grid);
+    int Ls = this->Ls;
+
+    FermionField D(V._grid);
 
     int nblock=(Ls-1)/2;
     for(int b=0;b<nblock;b++){
@@ -296,13 +323,15 @@ namespace Grid {
     }
     ag5xpby_ssp(D,p[nblock]*scale/amax,U,0.0,U,Ls-1,Ls-1);
 
-    DhopDerivEO(mat,D,V,DaggerNo); 
+    this->DhopDerivEO(mat,D,V,DaggerNo); 
   };
 
-    void  PartialFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
+    template<class Impl>
+    void  PartialFractionFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){
       SetCoefficientsZolotarev(1.0/scale,zdata);
     }
-    void  PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
+    template<class Impl>
+    void  PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){
 
       // check on degree matching
       //      std::cout<<GridLogMessage << Ls << " Ls"<<std::endl;
@@ -311,12 +340,14 @@ namespace Grid {
       //      std::cout<<GridLogMessage << zdata->db << " -db"<<std::endl;
       //      std::cout<<GridLogMessage << zdata->dn << " -dn"<<std::endl;
       //      std::cout<<GridLogMessage << zdata->dd << " -dd"<<std::endl;
+      int Ls = this->Ls;
+
       assert(Ls == (2*zdata->da -1) );
 
       // Part frac
       //      RealD R;
       R=(1+mass)/(1-mass);
-      dw_diag = (4.0-M5);
+      dw_diag = (4.0-this->M5);
 
       //      std::vector<RealD> p; 
       //      std::vector<RealD> q;
@@ -336,18 +367,21 @@ namespace Grid {
     }
 
       // Constructors
-    PartialFractionFermion5D::PartialFractionFermion5D(LatticeGaugeField &_Umu,
+    template<class Impl>
+    PartialFractionFermion5D<Impl>::PartialFractionFermion5D(GaugeField &_Umu,
 						       GridCartesian         &FiveDimGrid,
 						       GridRedBlackCartesian &FiveDimRedBlackGrid,
 						       GridCartesian         &FourDimGrid,
 						       GridRedBlackCartesian &FourDimRedBlackGrid,
 						       RealD _mass,RealD M5) :
-      WilsonFermion5D(_Umu,
+      WilsonFermion5D<Impl>(_Umu,
 		      FiveDimGrid, FiveDimRedBlackGrid,
 		      FourDimGrid, FourDimRedBlackGrid,M5),
       mass(_mass)
 
     {
+      int Ls = this->Ls;
+
       assert((Ls&0x1)==1); // Odd Ls required
       int nrational=Ls-1;
 
@@ -366,6 +400,8 @@ namespace Grid {
 
     }
  
+    FermOpTemplateInstantiate(PartialFractionFermion5D);
+
  }
 }
 
diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h
index 3ba2481a..acbc30e1 100644
--- a/lib/qcd/action/fermion/PartialFractionFermion5D.h
+++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h
@@ -5,46 +5,49 @@ namespace Grid {
 
   namespace QCD {
 
-    class PartialFractionFermion5D : public WilsonFermion5D
+    template<class Impl>
+    class PartialFractionFermion5D : public WilsonFermion5D<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       const int part_frac_chroma_convention=1;
 
-      void   Meooe_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void   Mooee_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void   MooeeInv_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void   M_internal(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void   Meooe_internal(const FermionField &in, FermionField &out,int dag);
+      void   Mooee_internal(const FermionField &in, FermionField &out,int dag);
+      void   MooeeInv_internal(const FermionField &in, FermionField &out,int dag);
+      void   M_internal(const FermionField &in, FermionField &out,int dag);
 
       // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+      virtual RealD  M    (const FermionField &in, FermionField &out);
+      virtual RealD  Mdag (const FermionField &in, FermionField &out);
 
       // half checkerboard operaions
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out);
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+      virtual void   Meooe       (const FermionField &in, FermionField &out);
+      virtual void   MeooeDag    (const FermionField &in, FermionField &out);
+      virtual void   Mooee       (const FermionField &in, FermionField &out);
+      virtual void   MooeeDag    (const FermionField &in, FermionField &out);
+      virtual void   MooeeInv    (const FermionField &in, FermionField &out);
+      virtual void   MooeeInvDag (const FermionField &in, FermionField &out);
 
       // force terms; five routines; default to Dhop on diagonal
-      virtual void MDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void MoeDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void MeoDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
+      virtual void MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 
       virtual void   Instantiatable(void) =0; // ensure no make-eee
 
       // Efficient support for multigrid coarsening
-      virtual void  Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
+      virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp);
 
       // Constructors
-      PartialFractionFermion5D(LatticeGaugeField &_Umu,
-				    GridCartesian         &FiveDimGrid,
-				    GridRedBlackCartesian &FiveDimRedBlackGrid,
-				    GridCartesian         &FourDimGrid,
-				    GridRedBlackCartesian &FourDimRedBlackGrid,
-				    RealD _mass,RealD M5);
+      PartialFractionFermion5D(GaugeField &_Umu,
+			       GridCartesian         &FiveDimGrid,
+			       GridRedBlackCartesian &FiveDimRedBlackGrid,
+			       GridCartesian         &FourDimGrid,
+			       GridRedBlackCartesian &FourDimRedBlackGrid,
+			       RealD _mass,RealD M5);
 
     protected:
 
diff --git a/lib/qcd/action/fermion/ScaledShamirFermion.h b/lib/qcd/action/fermion/ScaledShamirFermion.h
index 59fb16a8..de7ded0c 100644
--- a/lib/qcd/action/fermion/ScaledShamirFermion.h
+++ b/lib/qcd/action/fermion/ScaledShamirFermion.h
@@ -7,12 +7,15 @@ namespace Grid {
 
   namespace QCD {
 
-    class ScaledShamirFermion : public MobiusFermion
+    template<class Impl>
+    class ScaledShamirFermion : public MobiusFermion<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       // Constructors
-    ScaledShamirFermion(LatticeGaugeField &_Umu,
+    ScaledShamirFermion(GaugeField &_Umu,
 			GridCartesian         &FiveDimGrid,
 			GridRedBlackCartesian &FiveDimRedBlackGrid,
 			GridCartesian         &FourDimGrid,
@@ -21,7 +24,7 @@ namespace Grid {
 			RealD scale) :
       
       // b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
-      MobiusFermion(_Umu,
+      MobiusFermion<Impl>(_Umu,
 		    FiveDimGrid,
 		    FiveDimRedBlackGrid,
 		    FourDimGrid,
diff --git a/lib/qcd/action/fermion/ShamirZolotarevFermion.h b/lib/qcd/action/fermion/ShamirZolotarevFermion.h
index 6a7df439..339a37f1 100644
--- a/lib/qcd/action/fermion/ShamirZolotarevFermion.h
+++ b/lib/qcd/action/fermion/ShamirZolotarevFermion.h
@@ -7,14 +7,17 @@ namespace Grid {
 
   namespace QCD {
 
-    class ShamirZolotarevFermion : public MobiusZolotarevFermion
+    template<class Impl>
+    class ShamirZolotarevFermion : public MobiusZolotarevFermion<Impl>
     {
     public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+    public:
 
       // Constructors
 
 
-    ShamirZolotarevFermion(LatticeGaugeField &_Umu,
+    ShamirZolotarevFermion(GaugeField &_Umu,
 			   GridCartesian         &FiveDimGrid,
 			   GridRedBlackCartesian &FiveDimRedBlackGrid,
 			   GridCartesian         &FourDimGrid,
@@ -23,7 +26,7 @@ namespace Grid {
 			   RealD lo, RealD hi) : 
       
       // b+c = 1; b-c = 1 => b=1, c=0
-      MobiusZolotarevFermion(_Umu,
+      MobiusZolotarevFermion<Impl>(_Umu,
 			     FiveDimGrid,
 			     FiveDimRedBlackGrid,
 			     FourDimGrid,
diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h
index d9fe977a..c7a20c59 100644
--- a/lib/qcd/action/fermion/WilsonCompressor.h
+++ b/lib/qcd/action/fermion/WilsonCompressor.h
@@ -4,6 +4,7 @@
 namespace Grid {
 namespace QCD {
 
+  template<class SiteHalfSpinor,class SiteSpinor>
   class WilsonCompressor {
   public:
     int mu;
@@ -18,9 +19,13 @@ namespace QCD {
       mu=p;
     };
 
-    vHalfSpinColourVector operator () (const vSpinColourVector &in)
+    virtual SiteHalfSpinor operator () (const SiteSpinor &in) {
+      return spinproject(in);
+    }
+
+    SiteHalfSpinor spinproject(const SiteSpinor &in)
     {
-      vHalfSpinColourVector ret;
+      SiteHalfSpinor ret;
       int mudag=mu;
       if (dag) {
 	mudag=(mu+Nd)%(2*Nd);
@@ -57,5 +62,33 @@ namespace QCD {
       return ret;
     }
   };
+
+
+  template<class SiteHalfSpinor,class SiteSpinor>
+    class GparityWilsonCompressor : public WilsonCompressor<SiteHalfSpinor,SiteSpinor>{
+  public:
+
+    GparityWilsonCompressor(int _dag) : WilsonCompressor<SiteHalfSpinor,SiteSpinor> (_dag){};
+
+    SiteHalfSpinor operator () (const SiteSpinor &in)
+    {
+      SiteHalfSpinor tmp = spinproject(in);
+      if( 0 ) tmp = flavourflip(tmp);
+      return tmp;
+    }
+
+    SiteHalfSpinor flavourflip(const SiteHalfSpinor &in) {
+
+      SiteHalfSpinor ret;
+      for(int f=0;f<Ngp;f++){
+	ret(0) = in(1);
+	ret(1) = in(0);
+      }
+      return ret;
+    }
+
+  };
+
+
 }} // namespace close
 #endif
diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc
index 2c2132d1..260e3851 100644
--- a/lib/qcd/action/fermion/WilsonFermion.cc
+++ b/lib/qcd/action/fermion/WilsonFermion.cc
@@ -3,269 +3,290 @@
 namespace Grid {
 namespace QCD {
 
-const std::vector<int> WilsonFermion::directions   ({0,1,2,3, 0, 1, 2, 3});
-const std::vector<int> WilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1});
+  const std::vector<int> WilsonFermionStatic::directions   ({0,1,2,3, 0, 1, 2, 3});
+  const std::vector<int> WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1});
+  int WilsonFermionStatic::HandOptDslash;
 
-int WilsonFermion::HandOptDslash;
+  /////////////////////////////////
+  // Constructor and gauge import
+  /////////////////////////////////
 
-WilsonFermion::WilsonFermion(LatticeGaugeField &_Umu,
-			     GridCartesian         &Fgrid,
-			     GridRedBlackCartesian &Hgrid, 
-			     RealD _mass) :
-  _grid(&Fgrid),
-  _cbgrid(&Hgrid),
-  Stencil    (&Fgrid,npoint,Even,directions,displacements),
-  StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even
-  StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd
-  mass(_mass),
-  Umu(&Fgrid),
-  UmuEven(&Hgrid),
-  UmuOdd (&Hgrid)
-{
-  // Allocate the required comms buffer
-  comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
-  ImportGauge(_Umu);
-}
-void WilsonFermion::ImportGauge(const LatticeGaugeField &_Umu)
-{
-  DoubleStore(Umu,_Umu);
-  pickCheckerboard(Even,UmuEven,Umu);
-  pickCheckerboard(Odd ,UmuOdd,Umu);
-}
-      
-void WilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
-{
-  conformable(Uds._grid,GaugeGrid());
-  conformable(Umu._grid,GaugeGrid());
-  LatticeColourMatrix U(GaugeGrid());
-  for(int mu=0;mu<Nd;mu++){
-    U = PeekIndex<LorentzIndex>(Umu,mu);
-    PokeIndex<LorentzIndex>(Uds,U,mu);
-    U = adj(Cshift(U,mu,-1));
-    PokeIndex<LorentzIndex>(Uds,U,mu+4);
+  template<class Impl>
+  WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu,
+				     GridCartesian         &Fgrid,
+				     GridRedBlackCartesian &Hgrid, 
+				     RealD _mass) :
+        _grid(&Fgrid),
+	_cbgrid(&Hgrid),
+	Stencil    (&Fgrid,npoint,Even,directions,displacements),
+	StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even
+	StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd
+	mass(_mass),
+	Umu(&Fgrid),
+	UmuEven(&Hgrid),
+	UmuOdd (&Hgrid) 
+  {
+    // Allocate the required comms buffer
+    comm_buf.resize(Stencil._unified_buffer_size); // this is always big enough to contain EO
+    ImportGauge(_Umu);
   }
-}
 
-RealD WilsonFermion::M(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard=in.checkerboard;
-  Dhop(in,out,DaggerNo);
-  return axpy_norm(out,4+mass,in,out);
-}
-RealD WilsonFermion::Mdag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard=in.checkerboard;
-  Dhop(in,out,DaggerYes);
-  return axpy_norm(out,4+mass,in,out);
-}
-
-void WilsonFermion::Meooe(const LatticeFermion &in, LatticeFermion &out)
-{
-  if ( in.checkerboard == Odd ) {
-    DhopEO(in,out,DaggerNo);
-  } else {
-    DhopOE(in,out,DaggerNo);
+  template<class Impl>
+  void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
+  {
+    Impl::DoubleStore(GaugeGrid(),Umu,_Umu);
+    pickCheckerboard(Even,UmuEven,Umu);
+    pickCheckerboard(Odd ,UmuOdd,Umu);
   }
-}
-void WilsonFermion::MeooeDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  if ( in.checkerboard == Odd ) {
-    DhopEO(in,out,DaggerYes);
-  } else {
-    DhopOE(in,out,DaggerYes);
-  }
-}
-void WilsonFermion::Mooee(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  out = (4.0+mass)*in;
-  return ;
-}
-void WilsonFermion::MooeeDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  Mooee(in,out);
-}
-void WilsonFermion::MooeeInv(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  out = (1.0/(4.0+mass))*in;
-  return ;
-}
-void WilsonFermion::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out)
-{
-  out.checkerboard = in.checkerboard;
-  MooeeInv(in,out);
-}
-void WilsonFermion::Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp)
-{
-  DhopDir(in,out,dir,disp);
-}
-void WilsonFermion::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp){
-
-  WilsonCompressor compressor(DaggerNo);
-
-  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
   
-  assert( (disp==1)||(disp==-1) );
-
-  int skip = (disp==1) ? 0 : 1;
-
-  int dirdisp = dir+skip*4;
-
-PARALLEL_FOR_LOOP
-  for(int sss=0;sss<in._grid->oSites();sss++){
-    DiracOptDhopDir(Stencil,Umu,comm_buf,sss,sss,in,out,dirdisp,dirdisp);
-  }
- 
-};
-void WilsonFermion::DhopDirDisp(const LatticeFermion &in, LatticeFermion &out,int dirdisp,int gamma,int dag)
-{
-
-  WilsonCompressor compressor(dag);
-
-  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
-
-PARALLEL_FOR_LOOP
-  for(int sss=0;sss<in._grid->oSites();sss++){
-    DiracOptDhopDir(Stencil,Umu,comm_buf,sss,sss,in,out,dirdisp,gamma);
-  }
- 
-};
-
-void WilsonFermion::DhopInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
-				const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  assert((dag==DaggerNo) ||(dag==DaggerYes));
-  WilsonCompressor compressor(dag);
-  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
-
-  if ( dag == DaggerYes ) {
-    if( HandOptDslash ) {
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOptHandDhopSiteDag(st,U,comm_buf,sss,sss,in,out);
-      }
-    } else { 
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOptDhopSiteDag(st,U,comm_buf,sss,sss,in,out);
-      }
-    }
-  } else {
-    if( HandOptDslash ) {
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOptHandDhopSite(st,U,comm_buf,sss,sss,in,out);
-      }
-    } else { 
-PARALLEL_FOR_LOOP
-      for(int sss=0;sss<in._grid->oSites();sss++){
-        DiracOptDhopSite(st,U,comm_buf,sss,sss,in,out);
-      }
-    }
-  }
-}
-void WilsonFermion::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  conformable(in._grid,_cbgrid);    // verifies half grid
-  conformable(in._grid,out._grid); // drops the cb check
-
-  assert(in.checkerboard==Even);
-  out.checkerboard = Odd;
-
-  DhopInternal(StencilEven,UmuOdd,in,out,dag);
-}
-void WilsonFermion::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  conformable(in._grid,_cbgrid);    // verifies half grid
-  conformable(in._grid,out._grid); // drops the cb check
-
-  assert(in.checkerboard==Odd);
-  out.checkerboard = Even;
-
-  DhopInternal(StencilOdd,UmuEven,in,out,dag);
-}
-void WilsonFermion::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
-{
-  conformable(in._grid,_grid); // verifies full grid
-  conformable(in._grid,out._grid);
-
-  out.checkerboard = in.checkerboard;
-
-  DhopInternal(Stencil,Umu,in,out,dag);
-}
-
-void WilsonFermion::DerivInternal(CartesianStencil & st,LatticeDoubledGaugeField & U,
-				  LatticeGaugeField &mat,const LatticeFermion &A,const LatticeFermion &B,int dag)
-{
-  assert((dag==DaggerNo) ||(dag==DaggerYes));
-
-  WilsonCompressor compressor(dag);
-
-  LatticeColourMatrix tmp(B._grid);
-  LatticeFermion Btilde(B._grid);
-
-  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(B,comm_buf,compressor);
-  
-  for(int mu=0;mu<Nd;mu++){
+  /////////////////////////////
+  // Implement the interface
+  /////////////////////////////
       
-    ////////////////////////////////////////////////////////////////////////
-    // Flip gamma (1+g)<->(1-g) if dag
-    ////////////////////////////////////////////////////////////////////////
-    int gamma = mu;
-    if ( dag ) gamma+= Nd;
-
-    ////////////////////////
-    // Call the single hop
-    ////////////////////////
-PARALLEL_FOR_LOOP
-    for(int sss=0;sss<B._grid->oSites();sss++){
-      DiracOptDhopDir(st,U,comm_buf,sss,sss,B,Btilde,mu,gamma);
-    }
-
-    //////////////////////////////////////////////////
-    // spin trace outer product
-    //////////////////////////////////////////////////
-    tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); 
-    PokeIndex<LorentzIndex>(mat,tmp,mu);
-
+  template<class Impl>
+  RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) 
+  {
+    out.checkerboard=in.checkerboard;
+    Dhop(in,out,DaggerNo);
+    return axpy_norm(out,4+mass,in,out);
   }
-}
-void WilsonFermion::DhopDeriv(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
-{
-  conformable(U._grid,_grid);  
-  conformable(U._grid,V._grid);
-  conformable(U._grid,mat._grid);
 
-  mat.checkerboard = U.checkerboard;
+  template<class Impl>
+  RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) 
+  {
+    out.checkerboard=in.checkerboard;
+    Dhop(in,out,DaggerYes);
+    return axpy_norm(out,4+mass,in,out);
+  }
 
-  DerivInternal(Stencil,Umu,mat,U,V,dag);
-}
-void WilsonFermion::DhopDerivOE(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
-{
-  conformable(U._grid,_cbgrid);  
-  conformable(U._grid,V._grid);
-  conformable(U._grid,mat._grid);
+  template<class Impl>
+  void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) 
+  {
+    if ( in.checkerboard == Odd ) {
+      DhopEO(in,out,DaggerNo);
+    } else {
+      DhopOE(in,out,DaggerNo);
+    }
+  }
+  template<class Impl>
+  void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) 
+  {
+    if ( in.checkerboard == Odd ) {
+      DhopEO(in,out,DaggerYes);
+    } else {
+      DhopOE(in,out,DaggerYes);
+    }
+  }
 
-  assert(V.checkerboard==Even);
-  assert(U.checkerboard==Odd);
-  mat.checkerboard = Odd;
+  template<class Impl>
+  void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
+    out.checkerboard = in.checkerboard;
+    out = (4.0+mass)*in;
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
+    out.checkerboard = in.checkerboard;
+    Mooee(in,out);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
+    out.checkerboard = in.checkerboard;
+    out = (1.0/(4.0+mass))*in;
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
+    out.checkerboard = in.checkerboard;
+    MooeeInv(in,out);
+  }
+  
+  ///////////////////////////////////
+  // Internal
+  ///////////////////////////////////
 
-  DerivInternal(StencilEven,UmuOdd,mat,U,V,dag);
-}
-void WilsonFermion::DhopDerivEO(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag)
-{
-  conformable(U._grid,_cbgrid);  
-  conformable(U._grid,V._grid);
-  conformable(U._grid,mat._grid);
+  template<class Impl>
+  void WilsonFermion<Impl>::DerivInternal(CartesianStencil & st,
+					  DoubledGaugeField & U,
+					  GaugeField &mat,
+					  const FermionField &A,
+					  const FermionField &B,int dag) {
+	
+    assert((dag==DaggerNo) ||(dag==DaggerYes));
+    
+    Compressor compressor(dag);
+    
+    GaugeLinkField tmp(B._grid);
+    FermionField Btilde(B._grid);
+    
+    st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(B,comm_buf,compressor);
+    
+    for(int mu=0;mu<Nd;mu++){
+      
+      ////////////////////////////////////////////////////////////////////////
+      // Flip gamma (1+g)<->(1-g) if dag
+      ////////////////////////////////////////////////////////////////////////
+      int gamma = mu;
+      if ( dag ) gamma+= Nd;
+      
+      ////////////////////////
+      // Call the single hop
+      ////////////////////////
+PARALLEL_FOR_LOOP
+	for(int sss=0;sss<B._grid->oSites();sss++){
+	  Kernels::DiracOptDhopDir(st,U,comm_buf,sss,sss,B,Btilde,mu,gamma);
+	}
+      
+      //////////////////////////////////////////////////
+      // spin trace outer product
+      //////////////////////////////////////////////////
+      tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); 
+      PokeIndex<LorentzIndex>(mat,tmp,mu);
+      
+    }
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
+  {
+    conformable(U._grid,_grid);  
+    conformable(U._grid,V._grid);
+    conformable(U._grid,mat._grid);
+    
+    mat.checkerboard = U.checkerboard;
+    
+    DerivInternal(Stencil,Umu,mat,U,V,dag);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
+  {
+    conformable(U._grid,_cbgrid);  
+    conformable(U._grid,V._grid);
+    conformable(U._grid,mat._grid);
+    
+    assert(V.checkerboard==Even);
+    assert(U.checkerboard==Odd);
+    mat.checkerboard = Odd;
+    
+    DerivInternal(StencilEven,UmuOdd,mat,U,V,dag);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
+  {
+    conformable(U._grid,_cbgrid);  
+    conformable(U._grid,V._grid);
+    conformable(U._grid,mat._grid);
+	
+    assert(V.checkerboard==Odd);
+    assert(U.checkerboard==Even);
+    mat.checkerboard = Even;
+	
+    DerivInternal(StencilOdd,UmuEven,mat,U,V,dag);
+  }
+  
 
-  assert(V.checkerboard==Odd);
-  assert(U.checkerboard==Even);
-  mat.checkerboard = Even;
+  template<class Impl>
+  void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) {
+    conformable(in._grid,_grid); // verifies full grid
+    conformable(in._grid,out._grid);
+    
+    out.checkerboard = in.checkerboard;
+    
+    DhopInternal(Stencil,Umu,in,out,dag);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) {
+    conformable(in._grid,_cbgrid);    // verifies half grid
+    conformable(in._grid,out._grid); // drops the cb check
+    
+    assert(in.checkerboard==Even);
+    out.checkerboard = Odd;
+    
+    DhopInternal(StencilEven,UmuOdd,in,out,dag);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
+    conformable(in._grid,_cbgrid);    // verifies half grid
+    conformable(in._grid,out._grid); // drops the cb check
+    
+    assert(in.checkerboard==Odd);
+    out.checkerboard = Even;
+    
+    DhopInternal(StencilOdd,UmuEven,in,out,dag);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::Mdir (const FermionField &in, FermionField &out,int dir,int disp) {
+    DhopDir(in,out,dir,disp);
+  }
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){
+    
+    int skip = (disp==1) ? 0 : 1;
+    int dirdisp = dir+skip*4;
+    
+    DhopDirDisp(in,out,dirdisp,dirdisp,DaggerNo);
+    
+  };
+  
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) {
+    
+    Compressor compressor(dag);
+    
+    Stencil.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor);
+    
+PARALLEL_FOR_LOOP
+      for(int sss=0;sss<in._grid->oSites();sss++){
+	Kernels::DiracOptDhopDir(Stencil,Umu,comm_buf,sss,sss,in,out,dirdisp,gamma);
+      }
+    
+  };
 
-  DerivInternal(StencilOdd,UmuEven,mat,U,V,dag);
-}
+
+  template<class Impl>
+  void WilsonFermion<Impl>::DhopInternal(CartesianStencil & st,DoubledGaugeField & U,
+					 const FermionField &in, FermionField &out,int dag) {
+
+    assert((dag==DaggerNo) ||(dag==DaggerYes));
+
+    Compressor compressor(dag);
+    st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor);
+    
+    if ( dag == DaggerYes ) {
+      if( HandOptDslash ) {
+PARALLEL_FOR_LOOP
+        for(int sss=0;sss<in._grid->oSites();sss++){
+	  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sss,sss,in,out);
+	}
+      } else { 
+PARALLEL_FOR_LOOP
+        for(int sss=0;sss<in._grid->oSites();sss++){
+	  Kernels::DiracOptDhopSiteDag(st,U,comm_buf,sss,sss,in,out);
+	}
+      }
+    } else {
+      if( HandOptDslash ) {
+PARALLEL_FOR_LOOP
+        for(int sss=0;sss<in._grid->oSites();sss++){
+	  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sss,sss,in,out);
+	}
+      } else { 
+PARALLEL_FOR_LOOP
+        for(int sss=0;sss<in._grid->oSites();sss++){
+	  Kernels::DiracOptDhopSite(st,U,comm_buf,sss,sss,in,out);
+	}
+      }
+    }
+  };
+ 
+  FermOpTemplateInstantiate(WilsonFermion);
 
 }}
 
diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h
index 79d3e626..1dc71c00 100644
--- a/lib/qcd/action/fermion/WilsonFermion.h
+++ b/lib/qcd/action/fermion/WilsonFermion.h
@@ -5,8 +5,20 @@ namespace Grid {
 
   namespace QCD {
 
-    class WilsonFermion : public FermionOperator<LatticeFermion,LatticeGaugeField>
+    class WilsonFermionStatic {
+    public:
+      static int HandOptDslash; // these are a temporary hack
+      static int MortonOrder;
+      static const std::vector<int> directions   ;
+      static const std::vector<int> displacements;
+      static const int npoint=8;
+    };
+
+    template<class Impl>
+    class WilsonFermion : public FermionOperator<Impl>, public WilsonFermionStatic
     {
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+
     public:
 
       ///////////////////////////////////////////////////////////////
@@ -17,133 +29,73 @@ namespace Grid {
       GridBase *FermionGrid(void)            { return _grid;}
       GridBase *FermionRedBlackGrid(void)    { return _cbgrid;}
 
-      // override multiply
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out);
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out);
+      //////////////////////////////////////////////////////////////////
+      // override multiply; cut number routines if pass dagger argument
+      // and also make interface more uniformly consistent
+      //////////////////////////////////////////////////////////////////
+      RealD M(const FermionField &in, FermionField &out);
+      RealD Mdag(const FermionField &in, FermionField &out);
 
-      // half checkerboard operaions
-      void   Meooe       (const LatticeFermion &in, LatticeFermion &out);
-      void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out);
-
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out); // remain virtual so we 
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out); // can derive Clover
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out); // from Wilson base
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out);
+      /////////////////////////////////////////////////////////
+      // half checkerboard operations
+      // could remain virtual so we  can derive Clover from Wilson base
+      /////////////////////////////////////////////////////////
+      void Meooe(const FermionField &in, FermionField &out) ;
+      void MeooeDag(const FermionField &in, FermionField &out) ;
+      void Mooee(const FermionField &in, FermionField &out) ;
+      void MooeeDag(const FermionField &in, FermionField &out) ;
+      void MooeeInv(const FermionField &in, FermionField &out) ;
+      void MooeeInvDag(const FermionField &in, FermionField &out) ;
 
       ////////////////////////
-      // 
-      // Force term: d/dtau S = 0
-      //
-      // It is simplest to consider the two flavour force term 
-      // 
-      //       S[U,phi] = phidag (MdagM)^-1 phi
-      //
-      // But simplify even this to
-      //
-      //       S[U,phi] = phidag MdagM phi
-      //
-      // (other options exist depending on nature of action fragment.)
-      // 
-      // Require momentum be traceless anti-hermitian to move within group manifold [ P = i P^a T^a ]
-      //
-      // Define the HMC hamiltonian
-      //
-      //       H = 1/2 Tr P^2 + S(U,phi)
-      // 
-      // .
-      // U =  P U    (lorentz & color indices multiplied)
-      //
-      // Hence
-      //
-      // .c    c  c       c
-      // U =  U  P   = - U  P        (c == dagger)
-      //
-      // So, taking some liberty with implicit indices
-      //                  .      .          .c      c
-      // dH/dt = 0 = Tr P P +Tr[ U  dS/dU + U  dS/dU  ]
-      //
-      //                .                          c      c
-      //           = Tr P P + i Tr[  P U dS/dU  - U   P dS/dU  ]
-      //
-      //                   .                        c  c
-      //           = Tr P (P + i ( U dS/dU - P dS/dU  U ]
-      //
-      //              .                        c  c
-      //           => P  = -i [ U dS/dU - dS/dU  U ]      generates HMC EoM
-      //
-      // Simple case work this out using S = phi^dag MdagM phi for wilson:
-      //                               c       c
-      // dSdt     = dU_xdt  dSdUx  + dUxdt dSdUx
-      //          
-      //         = Tr i P U_x [ (\phi^\dag)_x (1+g) (M \phi)_x+\mu   +(\phi^\dag M^\dag)_x (1-g) \phi_{x+\mu} ]
-      //                 c
-      //            - i U_x P [ (\phi^\dag)_x+mu (1-g) (M \phi)_x    +(\phi^\dag M^\dag)_(x+\mu) (1+g) \phi_{x} ]
-      //
-      //         = i [(\phi^\dag)_x      ]_j P_jk  [U_x(1+g) (M \phi)_x+\mu]_k        (1)
-      //         + i [(\phi^\dagM^\dag)_x]_j P_jk  [U_x(1-g) (\phi)_x+\mu]_k          (2)
-      //         - i [(\phi^\dag)_x+mu (1-g) U^dag_x]_j P_jk  [(M \phi)_xk            (3)
-      //         - i [(\phi^\dagM^\dag)_x+mu (1+g) U^dag_x]_j P_jk  [ \phi]_xk        (4)
-      //
-      // Observe that (1)* = (4)
-      //              (2)* = (3)
-      //
-      // Write as    .
-      //             P_{kj}  = - i (  [U_x(1+g) (M \phi)_x+\mu] (x) [(\phi^\dag)_x] + [U_x(1-g) (\phi)_x+\mu] (x) [(\phi^\dagM^\dag)_x] - h.c )
-      //
-      // where (x) denotes outer product in colour and spins are traced.
-      //
-      // Need only evaluate (1) and (2)  [Chroma] or (2) and (4) [IroIro] and take the 
-      // traceless anti hermitian part (of term in brackets w/o the "i")
-      // 
-      // Generalisation to S=phi^dag (MdagM)^{-1} phi is simple:
-      //
-      // For more complicated DWF etc... apply product rule in differentiation
-      //
+      // Derivative interface
       ////////////////////////
-      void DhopDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      void DhopDerivEO(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      void DhopDerivOE(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-
-      // Extra support internal
-      void DerivInternal(CartesianStencil & st,
-			 LatticeDoubledGaugeField & U,
-			 LatticeGaugeField &mat,
-			 const LatticeFermion &A,
-			 const LatticeFermion &B,
-			 int dag);
+      // Interface calls an internal routine
+      void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 
 
+      ///////////////////////////////////////////////////////////////
       // non-hermitian hopping term; half cb or both
-      void Dhop  (const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
+      ///////////////////////////////////////////////////////////////
+      void Dhop(const FermionField &in, FermionField &out,int dag) ;
+      void DhopOE(const FermionField &in, FermionField &out,int dag) ;
+      void DhopEO(const FermionField &in, FermionField &out,int dag) ;
 
-      // Multigrid assistance
-      void   Mdir (const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
-      void DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
-      void DhopDirDisp(const LatticeFermion &in, LatticeFermion &out,int dirdisp,int gamma,int dag);
+      ///////////////////////////////////////////////////////////////
+      // Multigrid assistance; force term uses too
+      ///////////////////////////////////////////////////////////////
+      void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ;
+      void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
+      void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ;
 
       ///////////////////////////////////////////////////////////////
       // Extra methods added by derived
       ///////////////////////////////////////////////////////////////
-      void DhopInternal(CartesianStencil & st,
-			LatticeDoubledGaugeField &U,
-			const LatticeFermion &in, 
-			LatticeFermion &out,
-			int dag);
+      void DerivInternal(CartesianStencil & st,
+			 DoubledGaugeField & U,
+			 GaugeField &mat,
+			 const FermionField &A,
+			 const FermionField &B,
+			 int dag);
+
+      void DhopInternal(CartesianStencil & st,DoubledGaugeField & U,
+			const FermionField &in, FermionField &out,int dag) ;
+
 
       // Constructor
-      WilsonFermion(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,RealD _mass);
+      WilsonFermion(GaugeField &_Umu,
+		    GridCartesian         &Fgrid,
+		    GridRedBlackCartesian &Hgrid, 
+		    RealD _mass) ;
 
-      // DoubleStore
-      virtual void ImportGauge(const LatticeGaugeField &_Umu);
-      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
+      // DoubleStore impl dependent
+      void ImportGauge(const GaugeField &_Umu);
 
       ///////////////////////////////////////////////////////////////
       // Data members require to support the functionality
       ///////////////////////////////////////////////////////////////
-      static int HandOptDslash; // these are a temporary hack
-      static int MortonOrder;
 
       //    protected:
     public:
@@ -153,26 +105,24 @@ namespace Grid {
       GridBase                     *    _grid; 
       GridBase                     *  _cbgrid;
 
-      static const int npoint=8;
-      static const std::vector<int> directions   ;
-      static const std::vector<int> displacements;
-
       //Defines the stencils for even and odd
       CartesianStencil Stencil; 
       CartesianStencil StencilEven; 
       CartesianStencil StencilOdd; 
 
       // Copy of the gauge field , with even and odd subsets
-      LatticeDoubledGaugeField Umu;
-      LatticeDoubledGaugeField UmuEven;
-      LatticeDoubledGaugeField UmuOdd;
+      DoubledGaugeField Umu;
+      DoubledGaugeField UmuEven;
+      DoubledGaugeField UmuOdd;
 
       // Comms buffer
-      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  comm_buf;
-
+      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
       
     };
 
+    typedef WilsonFermion<WilsonImplF> WilsonFermionF;
+    typedef WilsonFermion<WilsonImplD> WilsonFermionD;
+
   }
 }
 #endif
diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc
index cbe66bb5..f324db8d 100644
--- a/lib/qcd/action/fermion/WilsonFermion5D.cc
+++ b/lib/qcd/action/fermion/WilsonFermion5D.cc
@@ -3,19 +3,19 @@
 namespace Grid {
 namespace QCD {
   
-  // S-direction is INNERMOST and takes no part in the parity.
-  const std::vector<int> WilsonFermion5D::directions   ({1,2,3,4, 1, 2, 3, 4});
-  const std::vector<int> WilsonFermion5D::displacements({1,1,1,1,-1,-1,-1,-1});
-
-  int WilsonFermion5D::HandOptDslash;
+// S-direction is INNERMOST and takes no part in the parity.
+const std::vector<int> WilsonFermion5DStatic::directions   ({1,2,3,4, 1, 2, 3, 4});
+const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
+int WilsonFermion5DStatic::HandOptDslash;
 
   // 5d lattice for DWF.
-  WilsonFermion5D::WilsonFermion5D(LatticeGaugeField &_Umu,
-					   GridCartesian         &FiveDimGrid,
-					   GridRedBlackCartesian &FiveDimRedBlackGrid,
-					   GridCartesian         &FourDimGrid,
-					   GridRedBlackCartesian &FourDimRedBlackGrid,
-					   RealD _M5) :
+template<class Impl>
+WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
+				       GridCartesian         &FiveDimGrid,
+				       GridRedBlackCartesian &FiveDimRedBlackGrid,
+				       GridCartesian         &FourDimGrid,
+				       GridRedBlackCartesian &FourDimRedBlackGrid,
+				       RealD _M5) :
   _FiveDimGrid(&FiveDimGrid),
   _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
   _FourDimGrid(&FourDimGrid),
@@ -68,34 +68,23 @@ namespace QCD {
 
   ImportGauge(_Umu);
 }  
-void WilsonFermion5D::ImportGauge(const LatticeGaugeField &_Umu)
+template<class Impl>
+void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
 {
-  DoubleStore(Umu,_Umu);
+  Impl::DoubleStore(GaugeGrid(),Umu,_Umu);
   pickCheckerboard(Even,UmuEven,Umu);
   pickCheckerboard(Odd ,UmuOdd,Umu);
 }
-void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu)
-{
-  assert(GaugeGrid()->_ndimension==4);
-  conformable(Uds._grid,GaugeGrid());
-  conformable(Umu._grid,GaugeGrid());
-  LatticeColourMatrix U(GaugeGrid());
-  for(int mu=0;mu<Nd;mu++){
-    U = PeekIndex<LorentzIndex>(Umu,mu);
-    PokeIndex<LorentzIndex>(Uds,U,mu);
-    U = adj(Cshift(U,mu,-1));
-    PokeIndex<LorentzIndex>(Uds,U,mu+4);
-  }
-}
-void WilsonFermion5D::DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir5,int disp)
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
 {
   int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
                     // we drop off the innermost fifth dimension
   //  assert( (disp==1)||(disp==-1) );
   //  assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
 
-  WilsonCompressor compressor(DaggerNo);
-  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
+  Compressor compressor(DaggerNo);
+  Stencil.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor);
   
   int skip = (disp==1) ? 0 : 1;
 
@@ -109,16 +98,17 @@ PARALLEL_FOR_LOOP
     for(int s=0;s<Ls;s++){
       int sU=ss;
       int sF = s+Ls*sU; 
-      DiracOptDhopDir(Stencil,Umu,comm_buf,sF,sU,in,out,dirdisp,dirdisp);
+      Kernels::DiracOptDhopDir(Stencil,Umu,comm_buf,sF,sU,in,out,dirdisp,dirdisp);
     }
   }
 };
 
-void WilsonFermion5D::DerivInternal(CartesianStencil & st,
-				    LatticeDoubledGaugeField & U,
-				    LatticeGaugeField &mat,
-				    const LatticeFermion &A,
-				    const LatticeFermion &B,
+template<class Impl>
+void WilsonFermion5D<Impl>::DerivInternal(CartesianStencil & st,
+				    DoubledGaugeField & U,
+				    GaugeField &mat,
+				    const FermionField &A,
+				    const FermionField &B,
 				    int dag)
 {
   assert((dag==DaggerNo) ||(dag==DaggerYes));
@@ -126,13 +116,13 @@ void WilsonFermion5D::DerivInternal(CartesianStencil & st,
   conformable(st._grid,A._grid);
   conformable(st._grid,B._grid);
 
-  WilsonCompressor compressor(dag);
+  Compressor compressor(dag);
   
-  LatticeColourMatrix tmp(mat._grid);
-  LatticeFermion Btilde(B._grid);
-  LatticeFermion Atilde(B._grid);
+  GaugeLinkField tmp(mat._grid);
+  FermionField Btilde(B._grid);
+  FermionField Atilde(B._grid);
 
-  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(B,comm_buf,compressor);
+  st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(B,comm_buf,compressor);
 
   Atilde=A;
 
@@ -158,7 +148,7 @@ PARALLEL_FOR_LOOP
 	assert ( sF< B._grid->oSites());
 	assert ( sU< U._grid->oSites());
 
-	DiracOptDhopDir(st,U,comm_buf,sF,sU,B,Btilde,mu,gamma);
+	Kernels::DiracOptDhopDir(st,U,comm_buf,sF,sU,B,Btilde,mu,gamma);
 
     ////////////////////////////
     // spin trace outer product
@@ -176,10 +166,11 @@ PARALLEL_FOR_LOOP
   }
 }
 
-void WilsonFermion5D::DhopDeriv(      LatticeGaugeField &mat,
-				const LatticeFermion &A,
-				const LatticeFermion &B,
-				int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopDeriv(      GaugeField &mat,
+					    const FermionField &A,
+					    const FermionField &B,
+					    int dag)
 {
   conformable(A._grid,FermionGrid());  
   conformable(A._grid,B._grid);
@@ -190,10 +181,11 @@ void WilsonFermion5D::DhopDeriv(      LatticeGaugeField &mat,
   DerivInternal(Stencil,Umu,mat,A,B,dag);
 }
 
-void WilsonFermion5D::DhopDerivEO(LatticeGaugeField &mat,
-				  const LatticeFermion &A,
-				  const LatticeFermion &B,
-				  int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
+					const FermionField &A,
+					const FermionField &B,
+					int dag)
 {
   conformable(A._grid,FermionRedBlackGrid());
   conformable(GaugeRedBlackGrid(),mat._grid);
@@ -206,9 +198,10 @@ void WilsonFermion5D::DhopDerivEO(LatticeGaugeField &mat,
   DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
 }
 
-void WilsonFermion5D::DhopDerivOE(LatticeGaugeField &mat,
-				  const LatticeFermion &A,
-				  const LatticeFermion &B,
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
+				  const FermionField &A,
+				  const FermionField &B,
 				  int dag)
 {
   conformable(A._grid,FermionRedBlackGrid());
@@ -222,15 +215,16 @@ void WilsonFermion5D::DhopDerivOE(LatticeGaugeField &mat,
   DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
 }
 
-void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
-				   LatticeDoubledGaugeField & U,
-				   const LatticeFermion &in, LatticeFermion &out,int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
+					 DoubledGaugeField & U,
+					 const FermionField &in, FermionField &out,int dag)
 {
   //  assert((dag==DaggerNo) ||(dag==DaggerYes));
 
-  WilsonCompressor compressor(dag);
+  Compressor compressor(dag);
 
-  st.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
+  st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(in,comm_buf,compressor);
   
   // Dhop takes the 4d grid from U, and makes a 5d index for fermion
   // Not loop ordering and data layout.
@@ -238,13 +232,13 @@ void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo,
   // - per thread reuse in L1 cache for U
   // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
   if ( dag == DaggerYes ) {
-    if( HandOptDslash ) {
+    if( this->HandOptDslash ) {
 PARALLEL_FOR_LOOP
       for(int ss=0;ss<U._grid->oSites();ss++){
 	for(int s=0;s<Ls;s++){
 	  int sU=ss;
 	  int sF = s+Ls*sU;
-	  DiracOptHandDhopSiteDag(st,U,comm_buf,sF,sU,in,out);
+	  Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 	  }
       }
     } else { 
@@ -255,20 +249,20 @@ PARALLEL_FOR_LOOP
 	  for(sd=0;sd<Ls;sd++){
 	    int sU=ss;
 	    int sF = sd+Ls*sU;
-	    DiracOptDhopSiteDag(st,U,comm_buf,sF,sU,in,out);
+	    Kernels::DiracOptDhopSiteDag(st,U,comm_buf,sF,sU,in,out);
 	  }
 	}
       }
     }
   } else {
-    if( HandOptDslash ) {
+    if( this->HandOptDslash ) {
 PARALLEL_FOR_LOOP
       for(int ss=0;ss<U._grid->oSites();ss++){
 	for(int s=0;s<Ls;s++){
 	  //	  int sU=lo.Reorder(ss);
 	  int sU=ss;
 	  int sF = s+Ls*sU;
-	  DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
+	  Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
 	}
       }
 
@@ -279,13 +273,14 @@ PARALLEL_FOR_LOOP
 	  //	  int sU=lo.Reorder(ss);
 	  int sU=ss;
 	  int sF = s+Ls*sU; 
-	  DiracOptDhopSite(st,U,comm_buf,sF,sU,in,out);
+	  Kernels::DiracOptDhopSite(st,U,comm_buf,sF,sU,in,out);
 	}
       }
     }
   }
 }
-void WilsonFermion5D::DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
 {
   conformable(in._grid,FermionRedBlackGrid());    // verifies half grid
   conformable(in._grid,out._grid); // drops the cb check
@@ -295,7 +290,8 @@ void WilsonFermion5D::DhopOE(const LatticeFermion &in, LatticeFermion &out,int d
 
   DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
 }
-void WilsonFermion5D::DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 {
   conformable(in._grid,FermionRedBlackGrid());    // verifies half grid
   conformable(in._grid,out._grid); // drops the cb check
@@ -305,7 +301,8 @@ void WilsonFermion5D::DhopEO(const LatticeFermion &in, LatticeFermion &out,int d
 
   DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
 }
-void WilsonFermion5D::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
 {
   conformable(in._grid,FermionGrid()); // verifies full grid
   conformable(in._grid,out._grid);
@@ -314,12 +311,16 @@ void WilsonFermion5D::Dhop(const LatticeFermion &in, LatticeFermion &out,int dag
 
   DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
 }
-void WilsonFermion5D::DW(const LatticeFermion &in, LatticeFermion &out,int dag)
+template<class Impl>
+void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
 {
   out.checkerboard=in.checkerboard;
   Dhop(in,out,dag); // -0.5 is included
   axpy(out,4.0-M5,in,out);
 }
+
+FermOpTemplateInstantiate(WilsonFermion5D);
+
 }}
 
 
diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h
index d5cff3e6..5159d9e1 100644
--- a/lib/qcd/action/fermion/WilsonFermion5D.h
+++ b/lib/qcd/action/fermion/WilsonFermion5D.h
@@ -14,21 +14,21 @@ namespace Grid {
     // i.e. even even contains fifth dim hopping term.
     //
     // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
-    ////////////////////////////
-    //ContFrac:
-    //  Ls always odd. Rational poly deg is either Ls or Ls-1
-    //PartFrac 
-    //  Ls always odd. Rational poly deg is either Ls or Ls-1
-    //
-    //Cayley: Ls always even, Rational poly deg is Ls
-    // 
-    // Just set nrational as Ls. Forget about Ls-1 cases.
-    //
-    // Require odd Ls for cont and part frac
-    ////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////
-    class WilsonFermion5D : public FermionOperator<LatticeFermion,LatticeGaugeField>
+
+    class WilsonFermion5DStatic { 
+    public:
+      // S-direction is INNERMOST and takes no part in the parity.
+      static int HandOptDslash; // these are a temporary hack
+      static const std::vector<int> directions;
+      static const std::vector<int> displacements;
+      const int npoint = 8;
+    };
+
+    template<class Impl>
+    class WilsonFermion5D : public FermionOperator<Impl>, public WilsonFermion5DStatic
     {
+#include <qcd/action/fermion/FermionImplTypedefs.h>
     public:
       ///////////////////////////////////////////////////////////////
       // Implement the abstract base
@@ -39,69 +39,65 @@ namespace Grid {
       GridBase *FermionRedBlackGrid(void)    { return _FiveDimRedBlackGrid;}
 
       // full checkerboard operations; leave unimplemented as abstract for now
-      virtual RealD  M    (const LatticeFermion &in, LatticeFermion &out){assert(0); return 0.0;};
-      virtual RealD  Mdag (const LatticeFermion &in, LatticeFermion &out){assert(0); return 0.0;};
+      virtual RealD  M    (const FermionField &in, FermionField &out){assert(0); return 0.0;};
+      virtual RealD  Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
 
       // half checkerboard operations; leave unimplemented as abstract for now
-      virtual void   Meooe       (const LatticeFermion &in, LatticeFermion &out){assert(0);};
-      virtual void   Mooee       (const LatticeFermion &in, LatticeFermion &out){assert(0);};
-      virtual void   MooeeInv    (const LatticeFermion &in, LatticeFermion &out){assert(0);};
+      virtual void   Meooe       (const FermionField &in, FermionField &out){assert(0);};
+      virtual void   Mooee       (const FermionField &in, FermionField &out){assert(0);};
+      virtual void   MooeeInv    (const FermionField &in, FermionField &out){assert(0);};
 
-      virtual void   MeooeDag    (const LatticeFermion &in, LatticeFermion &out){assert(0);};
-      virtual void   MooeeDag    (const LatticeFermion &in, LatticeFermion &out){assert(0);};
-      virtual void   MooeeInvDag (const LatticeFermion &in, LatticeFermion &out){assert(0);};
+      virtual void   MeooeDag    (const FermionField &in, FermionField &out){assert(0);};
+      virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);};
+      virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
 
-      // These can be overridden by fancy 5d chiral actions
-      virtual void DhopDeriv  (LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void DhopDerivEO(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
-      virtual void DhopDerivOE(LatticeGaugeField &mat,const LatticeFermion &U,const LatticeFermion &V,int dag);
+      // These can be overridden by fancy 5d chiral action
+      virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
+      virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 
       // Implement hopping term non-hermitian hopping term; half cb or both
       // Implement s-diagonal DW
-      void DW    (const LatticeFermion &in, LatticeFermion &out,int dag);
-      void Dhop  (const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag);
-      void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag);
+      void DW    (const FermionField &in, FermionField &out,int dag);
+      void Dhop  (const FermionField &in, FermionField &out,int dag);
+      void DhopOE(const FermionField &in, FermionField &out,int dag);
+      void DhopEO(const FermionField &in, FermionField &out,int dag);
 
       // add a DhopComm
       // -- suboptimal interface will presently trigger multiple comms.
-      void DhopDir(const LatticeFermion &in, LatticeFermion &out,int dir,int disp);
+      void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
 
       ///////////////////////////////////////////////////////////////
       // New methods added 
       ///////////////////////////////////////////////////////////////
-
       void DerivInternal(CartesianStencil & st,
-			 LatticeDoubledGaugeField & U,
-			 LatticeGaugeField &mat,
-			 const LatticeFermion &A,
-			 const LatticeFermion &B,
+			 DoubledGaugeField & U,
+			 GaugeField &mat,
+			 const FermionField &A,
+			 const FermionField &B,
 			 int dag);
 
       void DhopInternal(CartesianStencil & st,
 			LebesgueOrder &lo,
-			LatticeDoubledGaugeField &U,
-			const LatticeFermion &in, 
-			LatticeFermion &out,
+			DoubledGaugeField &U,
+			const FermionField &in, 
+			FermionField &out,
 			int dag);
 
       // Constructors
-      WilsonFermion5D(LatticeGaugeField &_Umu,
-			  GridCartesian         &FiveDimGrid,
-			  GridRedBlackCartesian &FiveDimRedBlackGrid,
-			  GridCartesian         &FourDimGrid,
-			  GridRedBlackCartesian &FourDimRedBlackGrid,
-			  double _M5);
+      WilsonFermion5D(GaugeField &_Umu,
+		      GridCartesian         &FiveDimGrid,
+		      GridRedBlackCartesian &FiveDimRedBlackGrid,
+		      GridCartesian         &FourDimGrid,
+		      GridRedBlackCartesian &FourDimRedBlackGrid,
+		      double _M5);
 
       // DoubleStore
-      virtual void ImportGauge(const LatticeGaugeField &_Umu);
-      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
+      void ImportGauge(const GaugeField &_Umu);
 
       ///////////////////////////////////////////////////////////////
       // Data members require to support the functionality
       ///////////////////////////////////////////////////////////////
-      static int HandOptDslash; // these are a temporary hack
-
     protected:
 
       // Add these to the support from Wilson
@@ -110,10 +106,6 @@ namespace Grid {
       GridBase *_FiveDimGrid;
       GridBase *_FiveDimRedBlackGrid;
 
-      static const int npoint=8;
-      static const std::vector<int> directions   ;
-      static const std::vector<int> displacements;
-
       double                        M5;
       int Ls;
 
@@ -123,15 +115,15 @@ namespace Grid {
       CartesianStencil StencilOdd; 
 
       // Copy of the gauge field , with even and odd subsets
-      LatticeDoubledGaugeField Umu;
-      LatticeDoubledGaugeField UmuEven;
-      LatticeDoubledGaugeField UmuOdd;
+      DoubledGaugeField Umu;
+      DoubledGaugeField UmuEven;
+      DoubledGaugeField UmuOdd;
 
       LebesgueOrder Lebesgue;
       LebesgueOrder LebesgueEvenOdd;
 
       // Comms buffer
-      std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  comm_buf;
+      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  comm_buf;
       
     };
   }
diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc
index 2b8b7929..6d558e03 100644
--- a/lib/qcd/action/fermion/WilsonKernels.cc
+++ b/lib/qcd/action/fermion/WilsonKernels.cc
@@ -2,23 +2,317 @@
 namespace Grid {
 namespace QCD {
 
-void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			int sF,int sU,const LatticeFermion &in, LatticeFermion &out)
+template<class Impl> 
+void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeField &U,
+						  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+						  int sF,int sU,const FermionField &in, FermionField &out)
 {
-    vHalfSpinColourVector  tmp;    
-    vHalfSpinColourVector  chi;    
-    vSpinColourVector result;
-    vHalfSpinColourVector Uchi;
-    int offset,local,perm, ptype;
+  SiteHalfSpinor  tmp;    
+  SiteHalfSpinor  chi;    
+  SiteSpinor result;
+  SiteHalfSpinor Uchi;
+  int offset,local,perm, ptype;
 
-    // Xp
-    int ss = sF;
-    offset = st._offsets [Xp][ss];
-    local  = st._is_local[Xp][ss];
-    perm   = st._permute[Xp][ss];
+  // Xp
+  int ss = sF;
+  offset = st._offsets [Xp][ss];
+  local  = st._is_local[Xp][ss];
+  perm   = st._permute[Xp][ss];
+  
+  ptype  = st._permute_type[Xp];
+  if ( local && perm ) {
+    spProjXp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjXp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Xp);
+  spReconXp(result,Uchi);
+    
+  // Yp
+  offset = st._offsets [Yp][ss];
+  local  = st._is_local[Yp][ss];
+  perm   = st._permute[Yp][ss];
+  ptype  = st._permute_type[Yp];
+  if ( local && perm ) {
+    spProjYp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjYp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Yp);
+  accumReconYp(result,Uchi);
 
-    ptype  = st._permute_type[Xp];
+  // Zp
+  offset = st._offsets [Zp][ss];
+  local  = st._is_local[Zp][ss];
+  perm   = st._permute[Zp][ss];
+  ptype  = st._permute_type[Zp];
+  if ( local && perm ) {
+    spProjZp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjZp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Zp);
+  accumReconZp(result,Uchi);
+
+  // Tp
+  offset = st._offsets [Tp][ss];
+  local  = st._is_local[Tp][ss];
+  perm   = st._permute[Tp][ss];
+  ptype  = st._permute_type[Tp];
+  if ( local && perm ) {
+    spProjTp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjTp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Tp);
+  accumReconTp(result,Uchi);
+
+  // Xm
+  offset = st._offsets [Xm][ss];
+  local  = st._is_local[Xm][ss];
+  perm   = st._permute[Xm][ss];
+  ptype  = st._permute_type[Xm];
+  
+  if ( local && perm ) {
+    spProjXm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjXm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Xm);
+  accumReconXm(result,Uchi);
+  
+  // Ym
+  offset = st._offsets [Ym][ss];
+  local  = st._is_local[Ym][ss];
+  perm   = st._permute[Ym][ss];
+  ptype  = st._permute_type[Ym];
+  
+  if ( local && perm ) {
+    spProjYm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjYm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Ym);
+  accumReconYm(result,Uchi);
+  
+  // Zm
+  offset = st._offsets [Zm][ss];
+  local  = st._is_local[Zm][ss];
+  perm   = st._permute[Zm][ss];
+  ptype  = st._permute_type[Zm];
+  if ( local && perm ) {
+    spProjZm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjZm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Zm);
+  accumReconZm(result,Uchi);
+
+  // Tm
+  offset = st._offsets [Tm][ss];
+  local  = st._is_local[Tm][ss];
+  perm   = st._permute[Tm][ss];
+  ptype  = st._permute_type[Tm];
+  if ( local && perm ) {
+    spProjTm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjTm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Tm);
+  accumReconTm(result,Uchi);
+
+  vstream(out._odata[ss],result*(-0.5));
+};
+
+template<class Impl> 
+void WilsonKernels<Impl>::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
+					      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+					      int sF,int sU,const FermionField &in, FermionField &out)
+{
+  SiteHalfSpinor  tmp;    
+  SiteHalfSpinor  chi;    
+  SiteSpinor result;
+  SiteHalfSpinor Uchi;
+  int offset,local,perm, ptype;
+
+  // Xp
+  int ss=sF;
+  offset = st._offsets [Xm][ss];
+  local  = st._is_local[Xm][ss];
+  perm   = st._permute[Xm][ss];
+
+  ptype  = st._permute_type[Xm];
+  if ( local && perm ) {
+    spProjXp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjXp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Xm);
+  spReconXp(result,Uchi);
+
+  // Yp
+  offset = st._offsets [Ym][ss];
+  local  = st._is_local[Ym][ss];
+  perm   = st._permute[Ym][ss];
+  ptype  = st._permute_type[Ym];
+  if ( local && perm ) {
+    spProjYp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjYp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Ym);
+  accumReconYp(result,Uchi);
+  
+  // Zp
+  offset = st._offsets [Zm][ss];
+  local  = st._is_local[Zm][ss];
+  perm   = st._permute[Zm][ss];
+  ptype  = st._permute_type[Zm];
+  if ( local && perm ) {
+    spProjZp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjZp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Zm);
+  accumReconZp(result,Uchi);
+  
+  // Tp
+  offset = st._offsets [Tm][ss];
+  local  = st._is_local[Tm][ss];
+  perm   = st._permute[Tm][ss];
+  ptype  = st._permute_type[Tm];
+  if ( local && perm ) {
+    spProjTp(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjTp(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Tm);
+  accumReconTp(result,Uchi);
+  
+  // Xm
+  offset = st._offsets [Xp][ss];
+  local  = st._is_local[Xp][ss];
+  perm   = st._permute[Xp][ss];
+  ptype  = st._permute_type[Xp];
+
+  if ( local && perm ) {
+    spProjXm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjXm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Xp);
+  accumReconXm(result,Uchi);
+
+  // Ym
+  offset = st._offsets [Yp][ss];
+  local  = st._is_local[Yp][ss];
+  perm   = st._permute[Yp][ss];
+  ptype  = st._permute_type[Yp];
+
+  if ( local && perm ) {
+    spProjYm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjYm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Yp);
+  accumReconYm(result,Uchi);
+
+  // Zm
+  offset = st._offsets [Zp][ss];
+  local  = st._is_local[Zp][ss];
+  perm   = st._permute[Zp][ss];
+  ptype  = st._permute_type[Zp];
+  if ( local && perm ) {
+    spProjZm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjZm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Zp);
+  accumReconZm(result,Uchi);
+    
+  // Tm
+  offset = st._offsets [Tp][ss];
+  local  = st._is_local[Tp][ss];
+  perm   = st._permute[Tp][ss];
+  ptype  = st._permute_type[Tp];
+  if ( local && perm ) {
+    spProjTm(tmp,in._odata[offset]);
+    permute(chi,tmp,ptype);
+  } else if ( local ) {
+    spProjTm(chi,in._odata[offset]);
+  } else { 
+    chi=buf[offset];
+  }
+  Impl::multLink(Uchi,U._odata[sU],chi,Tp);
+  accumReconTm(result,Uchi);
+  
+  vstream(out._odata[ss],result*(-0.5));
+}
+
+template<class Impl> 
+void WilsonKernels<Impl>::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
+					  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+					  int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma)
+{
+  SiteHalfSpinor  tmp;    
+  SiteHalfSpinor  chi;    
+  SiteSpinor   result;
+  SiteHalfSpinor Uchi;
+  int offset,local,perm, ptype;
+  int ss=sF;
+
+  offset = st._offsets [dir][ss];
+  local  = st._is_local[dir][ss];
+  perm   = st._permute[dir][ss];
+  ptype  = st._permute_type[dir];
+
+  // Xp
+  if(gamma==Xp){
     if ( local && perm ) {
       spProjXp(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -27,14 +321,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Xp),&chi());
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
     spReconXp(result,Uchi);
+  }
 
-    // Yp
-    offset = st._offsets [Yp][ss];
-    local  = st._is_local[Yp][ss];
-    perm   = st._permute[Yp][ss];
-    ptype  = st._permute_type[Yp];
+  // Yp
+  if ( gamma==Yp ){
     if ( local && perm ) {
       spProjYp(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -43,14 +335,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Yp),&chi());
-    accumReconYp(result,Uchi);
-
-    // Zp
-    offset = st._offsets [Zp][ss];
-    local  = st._is_local[Zp][ss];
-    perm   = st._permute[Zp][ss];
-    ptype  = st._permute_type[Zp];
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconYp(result,Uchi);
+  }
+  
+  // Zp
+  if ( gamma ==Zp ){
     if ( local && perm ) {
       spProjZp(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -59,14 +349,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Zp),&chi());
-    accumReconZp(result,Uchi);
-
-    // Tp
-    offset = st._offsets [Tp][ss];
-    local  = st._is_local[Tp][ss];
-    perm   = st._permute[Tp][ss];
-    ptype  = st._permute_type[Tp];
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconZp(result,Uchi);
+  }
+  
+  // Tp
+  if ( gamma ==Tp ){
     if ( local && perm ) {
       spProjTp(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -75,15 +363,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Tp),&chi());
-    accumReconTp(result,Uchi);
-
-    // Xm
-    offset = st._offsets [Xm][ss];
-    local  = st._is_local[Xm][ss];
-    perm   = st._permute[Xm][ss];
-    ptype  = st._permute_type[Xm];
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconTp(result,Uchi);
+  }
 
+  // Xm
+  if ( gamma==Xm ){
     if ( local && perm ) {
       spProjXm(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -92,15 +377,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Xm),&chi());
-    accumReconXm(result,Uchi);
-
-    // Ym
-    offset = st._offsets [Ym][ss];
-    local  = st._is_local[Ym][ss];
-    perm   = st._permute[Ym][ss];
-    ptype  = st._permute_type[Ym];
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconXm(result,Uchi);
+  }
 
+  // Ym
+  if ( gamma == Ym ){
     if ( local && perm ) {
       spProjYm(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -109,14 +391,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Ym),&chi());
-    accumReconYm(result,Uchi);
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconYm(result,Uchi);
+  }
 
-    // Zm
-    offset = st._offsets [Zm][ss];
-    local  = st._is_local[Zm][ss];
-    perm   = st._permute[Zm][ss];
-    ptype  = st._permute_type[Zm];
+  // Zm
+  if ( gamma == Zm ){
     if ( local && perm ) {
       spProjZm(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -125,14 +405,12 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Zm),&chi());
-    accumReconZm(result,Uchi);
-
-    // Tm
-    offset = st._offsets [Tm][ss];
-    local  = st._is_local[Tm][ss];
-    perm   = st._permute[Tm][ss];
-    ptype  = st._permute_type[Tm];
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconZm(result,Uchi);
+  }
+  
+  // Tm
+  if ( gamma==Tm ) {
     if ( local && perm ) {
       spProjTm(tmp,in._odata[offset]);
       permute(chi,tmp,ptype);
@@ -141,287 +419,13 @@ void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     } else { 
       chi=buf[offset];
     }
-    mult(&Uchi(),&U._odata[sU](Tm),&chi());
-    accumReconTm(result,Uchi);
+    Impl::multLink(Uchi,U._odata[sU],chi,dir);
+    spReconTm(result,Uchi);
+  }
 
-    vstream(out._odata[ss],result*(-0.5));
+  vstream(out._odata[ss],result*(-0.5));
 }
 
-void DiracOptDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			   std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			   int sF,int sU,const LatticeFermion &in, LatticeFermion &out)
-{
-    vHalfSpinColourVector  tmp;    
-    vHalfSpinColourVector  chi;    
-    vSpinColourVector result;
-    vHalfSpinColourVector Uchi;
-    int offset,local,perm, ptype;
-
-    // Xp
-    int ss=sF;
-    offset = st._offsets [Xm][ss];
-    local  = st._is_local[Xm][ss];
-    perm   = st._permute[Xm][ss];
-
-    ptype  = st._permute_type[Xm];
-    if ( local && perm ) {
-      spProjXp(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjXp(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Xm),&chi());
-    spReconXp(result,Uchi);
-
-    // Yp
-    offset = st._offsets [Ym][ss];
-    local  = st._is_local[Ym][ss];
-    perm   = st._permute[Ym][ss];
-    ptype  = st._permute_type[Ym];
-    if ( local && perm ) {
-      spProjYp(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjYp(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Ym),&chi());
-    accumReconYp(result,Uchi);
-
-    // Zp
-    offset = st._offsets [Zm][ss];
-    local  = st._is_local[Zm][ss];
-    perm   = st._permute[Zm][ss];
-    ptype  = st._permute_type[Zm];
-    if ( local && perm ) {
-      spProjZp(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjZp(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Zm),&chi());
-    accumReconZp(result,Uchi);
-
-    // Tp
-    offset = st._offsets [Tm][ss];
-    local  = st._is_local[Tm][ss];
-    perm   = st._permute[Tm][ss];
-    ptype  = st._permute_type[Tm];
-    if ( local && perm ) {
-      spProjTp(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjTp(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Tm),&chi());
-    accumReconTp(result,Uchi);
-
-    // Xm
-    offset = st._offsets [Xp][ss];
-    local  = st._is_local[Xp][ss];
-    perm   = st._permute[Xp][ss];
-    ptype  = st._permute_type[Xp];
-
-    if ( local && perm ) 
-    {
-      spProjXm(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjXm(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Xp),&chi());
-    accumReconXm(result,Uchi);
-
-    // Ym
-    offset = st._offsets [Yp][ss];
-    local  = st._is_local[Yp][ss];
-    perm   = st._permute[Yp][ss];
-    ptype  = st._permute_type[Yp];
-
-    if ( local && perm ) {
-      spProjYm(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjYm(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Yp),&chi());
-    accumReconYm(result,Uchi);
-
-    // Zm
-    offset = st._offsets [Zp][ss];
-    local  = st._is_local[Zp][ss];
-    perm   = st._permute[Zp][ss];
-    ptype  = st._permute_type[Zp];
-    if ( local && perm ) {
-      spProjZm(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjZm(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Zp),&chi());
-    accumReconZm(result,Uchi);
-
-    // Tm
-    offset = st._offsets [Tp][ss];
-    local  = st._is_local[Tp][ss];
-    perm   = st._permute[Tp][ss];
-    ptype  = st._permute_type[Tp];
-    if ( local && perm ) {
-      spProjTm(tmp,in._odata[offset]);
-      permute(chi,tmp,ptype);
-    } else if ( local ) {
-      spProjTm(chi,in._odata[offset]);
-    } else { 
-      chi=buf[offset];
-    }
-    mult(&Uchi(),&U._odata[sU](Tp),&chi());
-    accumReconTm(result,Uchi);
-
-    vstream(out._odata[ss],result*(-0.5));
-}
-
-void DiracOptDhopDir(CartesianStencil &st,LatticeDoubledGaugeField &U,
-		     std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-		     int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dir,int gamma)
-{
-    vHalfSpinColourVector  tmp;    
-    vHalfSpinColourVector  chi;    
-    vSpinColourVector result;
-    vHalfSpinColourVector Uchi;
-    int offset,local,perm, ptype;
-    int ss=sF;
-
-    offset = st._offsets [dir][ss];
-    local  = st._is_local[dir][ss];
-    perm   = st._permute[dir][ss];
-    ptype  = st._permute_type[dir];
-
-    // Xp
-    if(gamma==Xp){
-      if ( local && perm ) {
-	spProjXp(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjXp(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconXp(result,Uchi);
-    }
-
-    // Yp
-    if ( gamma==Yp ){
-      if ( local && perm ) {
-	spProjYp(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjYp(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconYp(result,Uchi);
-    }
-
-    // Zp
-    if ( gamma ==Zp ){
-      if ( local && perm ) {
-	spProjZp(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjZp(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconZp(result,Uchi);
-    }
-
-    // Tp
-    if ( gamma ==Tp ){
-      if ( local && perm ) {
-	spProjTp(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjTp(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconTp(result,Uchi);
-    }
-
-    // Xm
-    if ( gamma==Xm ){
-      if ( local && perm ) {
-	spProjXm(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjXm(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconXm(result,Uchi);
-    }
-
-    // Ym
-    if ( gamma == Ym ){
-      if ( local && perm ) {
-	spProjYm(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjYm(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconYm(result,Uchi);
-    }
-
-    // Zm
-    if ( gamma == Zm ){
-      if ( local && perm ) {
-	spProjZm(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjZm(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconZm(result,Uchi);
-    }
-
-    // Tm
-    if ( gamma==Tm ) {
-      if ( local && perm ) {
-	spProjTm(tmp,in._odata[offset]);
-	permute(chi,tmp,ptype);
-      } else if ( local ) {
-	spProjTm(chi,in._odata[offset]);
-      } else { 
-	chi=buf[offset];
-      }
-      mult(&Uchi(),&U._odata[sU](dir),&chi());
-      spReconTm(result,Uchi);
-    }
-
-    vstream(out._odata[ss],result*(-0.5));
-}
+  FermOpTemplateInstantiate(WilsonKernels);
 
 }}
diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h
index cb0e07bb..eb0981fc 100644
--- a/lib/qcd/action/fermion/WilsonKernels.h
+++ b/lib/qcd/action/fermion/WilsonKernels.h
@@ -6,44 +6,46 @@ namespace Grid {
   namespace QCD {
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // Helper classes that implement Wilson stencil for a single site.
+    // Helper routines that implement Wilson stencil for a single site.
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
-    // Generic version works for any Nc and with extra flavour indices
-    //    namespace DiracOpt {
 
-      // These ones will need to be package intelligently. WilsonType base class
-      // for use by DWF etc..
-      void DiracOptDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			    std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			    int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
-      void DiracOptDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			       std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			       int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
-      void DiracOptDhopDir(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			   std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			   int sF,int sU,const LatticeFermion &in, LatticeFermion &out,int dirdisp,int gamma);
+    template<class Impl> 
+    class WilsonKernels { 
+    public:
+
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+
+    public:
+      static void 
+	DiracOptDhopSite(CartesianStencil &st,DoubledGaugeField &U,
+			 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+			 int sF,int sU,const FermionField &in, FermionField &out);
       
-      //  };
+      static void 
+	DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
+			    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+			    int sF,int sU,const FermionField &in,FermionField &out);
 
-      // Hand unrolled for Nc=3, one flavour
-      //    namespace DiracOptHand {
-      // These ones will need to be package intelligently. WilsonType base class
-      // for use by DWF etc..
+      static void 
+	DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
+			std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+			int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
 
-      void DiracOptHandDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-				std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-				int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
-      void DiracOptHandDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-				   std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-				   int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
+      static void 
+	DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
+			     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+			     int sF,int sU,const FermionField &in, FermionField &out){
+	DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
+      }
 
-      //    };
-  
+      static void 
+	DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
+				std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+				int sF,int sU,const FermionField &in, FermionField &out){
+	DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
+      }
 
-    void DiracOptHandDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-				 std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-				 int sF,int sU,const LatticeFermion &in, LatticeFermion &out);
+    };
 
   }
 }
diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc
index cff700fb..ea9d5457 100644
--- a/lib/qcd/action/fermion/WilsonKernelsHand.cc
+++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc
@@ -280,48 +280,50 @@
 namespace Grid {
 namespace QCD {
 
-void DiracOptHandDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			    std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			    int sF,int sU,const LatticeFermion &in, LatticeFermion &out)
+#if 0
+template<class Simd>
+void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
+					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+					       int sF,int sU,const FermionField &in, FermionField &out)
 {
-  REGISTER vComplex result_00; // 12 regs on knc
-  REGISTER vComplex result_01;
-  REGISTER vComplex result_02;
+  REGISTER Simd result_00; // 12 regs on knc
+  REGISTER Simd result_01;
+  REGISTER Simd result_02;
   
-  REGISTER vComplex result_10;
-  REGISTER vComplex result_11;
-  REGISTER vComplex result_12;
+  REGISTER Simd result_10;
+  REGISTER Simd result_11;
+  REGISTER Simd result_12;
 
-  REGISTER vComplex result_20;
-  REGISTER vComplex result_21;
-  REGISTER vComplex result_22;
+  REGISTER Simd result_20;
+  REGISTER Simd result_21;
+  REGISTER Simd result_22;
 
-  REGISTER vComplex result_30;
-  REGISTER vComplex result_31;
-  REGISTER vComplex result_32; // 20 left
+  REGISTER Simd result_30;
+  REGISTER Simd result_31;
+  REGISTER Simd result_32; // 20 left
 
-  REGISTER vComplex Chi_00;    // two spinor; 6 regs
-  REGISTER vComplex Chi_01;
-  REGISTER vComplex Chi_02;
+  REGISTER Simd Chi_00;    // two spinor; 6 regs
+  REGISTER Simd Chi_01;
+  REGISTER Simd Chi_02;
 
-  REGISTER vComplex Chi_10;
-  REGISTER vComplex Chi_11;
-  REGISTER vComplex Chi_12;   // 14 left
+  REGISTER Simd Chi_10;
+  REGISTER Simd Chi_11;
+  REGISTER Simd Chi_12;   // 14 left
 
-  REGISTER vComplex UChi_00;  // two spinor; 6 regs
-  REGISTER vComplex UChi_01;
-  REGISTER vComplex UChi_02;
+  REGISTER Simd UChi_00;  // two spinor; 6 regs
+  REGISTER Simd UChi_01;
+  REGISTER Simd UChi_02;
 
-  REGISTER vComplex UChi_10;
-  REGISTER vComplex UChi_11;
-  REGISTER vComplex UChi_12;  // 8 left
+  REGISTER Simd UChi_10;
+  REGISTER Simd UChi_11;
+  REGISTER Simd UChi_12;  // 8 left
 
-  REGISTER vComplex U_00;  // two rows of U matrix
-  REGISTER vComplex U_10;
-  REGISTER vComplex U_20;  
-  REGISTER vComplex U_01;
-  REGISTER vComplex U_11;
-  REGISTER vComplex U_21;  // 2 reg left.
+  REGISTER Simd U_00;  // two rows of U matrix
+  REGISTER Simd U_10;
+  REGISTER Simd U_20;  
+  REGISTER Simd U_01;
+  REGISTER Simd U_11;
+  REGISTER Simd U_21;  // 2 reg left.
 
 #define Chimu_00 Chi_00
 #define Chimu_01 Chi_01
@@ -360,11 +362,6 @@ void DiracOptHandDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     MULT_2SPIN(Xp);
   }
   XP_RECON;
-  //  std::cout<<GridLogMessage << "XP_RECON"<<std::endl;
-  //  std::cout<<GridLogMessage << result_00 <<" "<<result_01 <<" "<<result_02 <<std::endl;
-  //  std::cout<<GridLogMessage << result_10 <<" "<<result_11 <<" "<<result_12 <<std::endl;
-  //  std::cout<<GridLogMessage << result_20 <<" "<<result_21 <<" "<<result_22 <<std::endl;
-  //  std::cout<<GridLogMessage << result_30 <<" "<<result_31 <<" "<<result_32 <<std::endl;
 
   // Yp
   offset = st._offsets [Yp][ss];
@@ -446,12 +443,6 @@ void DiracOptHandDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
     MULT_2SPIN(Xm);
   }
   XM_RECON_ACCUM;
-  //  std::cout<<GridLogMessage << "XM_RECON_ACCUM"<<std::endl;
-  //  std::cout<<GridLogMessage << result_00 <<" "<<result_01 <<" "<<result_02 <<std::endl;
-  //  std::cout<<GridLogMessage << result_10 <<" "<<result_11 <<" "<<result_12 <<std::endl;
-  //  std::cout<<GridLogMessage << result_20 <<" "<<result_21 <<" "<<result_22 <<std::endl;
-  //  std::cout<<GridLogMessage << result_30 <<" "<<result_31 <<" "<<result_32 <<std::endl;
-  
   
   // Ym
   offset = st._offsets [Ym][ss];
@@ -530,48 +521,49 @@ void DiracOptHandDhopSite(CartesianStencil &st,LatticeDoubledGaugeField &U,
   }
 }
 
-void DiracOptHandDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
-			       std::vector<vHalfSpinColourVector,alignedAllocator<vHalfSpinColourVector> >  &buf,
-			       int ss,int sU,const LatticeFermion &in, LatticeFermion &out)
+template<class Simd>
+void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
+							      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
+							      int ss,int sU,const FermionField &in, FermionField &out)
 {
-  REGISTER vComplex result_00; // 12 regs on knc
-  REGISTER vComplex result_01;
-  REGISTER vComplex result_02;
+  REGISTER Simd result_00; // 12 regs on knc
+  REGISTER Simd result_01;
+  REGISTER Simd result_02;
 
-  REGISTER vComplex result_10;
-  REGISTER vComplex result_11;
-  REGISTER vComplex result_12;
+  REGISTER Simd result_10;
+  REGISTER Simd result_11;
+  REGISTER Simd result_12;
 
-  REGISTER vComplex result_20;
-  REGISTER vComplex result_21;
-  REGISTER vComplex result_22;
+  REGISTER Simd result_20;
+  REGISTER Simd result_21;
+  REGISTER Simd result_22;
 
-  REGISTER vComplex result_30;
-  REGISTER vComplex result_31;
-  REGISTER vComplex result_32; // 20 left
+  REGISTER Simd result_30;
+  REGISTER Simd result_31;
+  REGISTER Simd result_32; // 20 left
 
-  REGISTER vComplex Chi_00;    // two spinor; 6 regs
-  REGISTER vComplex Chi_01;
-  REGISTER vComplex Chi_02;
+  REGISTER Simd Chi_00;    // two spinor; 6 regs
+  REGISTER Simd Chi_01;
+  REGISTER Simd Chi_02;
 
-  REGISTER vComplex Chi_10;
-  REGISTER vComplex Chi_11;
-  REGISTER vComplex Chi_12;   // 14 left
+  REGISTER Simd Chi_10;
+  REGISTER Simd Chi_11;
+  REGISTER Simd Chi_12;   // 14 left
 
-  REGISTER vComplex UChi_00;  // two spinor; 6 regs
-  REGISTER vComplex UChi_01;
-  REGISTER vComplex UChi_02;
+  REGISTER Simd UChi_00;  // two spinor; 6 regs
+  REGISTER Simd UChi_01;
+  REGISTER Simd UChi_02;
 
-  REGISTER vComplex UChi_10;
-  REGISTER vComplex UChi_11;
-  REGISTER vComplex UChi_12;  // 8 left
+  REGISTER Simd UChi_10;
+  REGISTER Simd UChi_11;
+  REGISTER Simd UChi_12;  // 8 left
 
-  REGISTER vComplex U_00;  // two rows of U matrix
-  REGISTER vComplex U_10;
-  REGISTER vComplex U_20;  
-  REGISTER vComplex U_01;
-  REGISTER vComplex U_11;
-  REGISTER vComplex U_21;  // 2 reg left.
+  REGISTER Simd U_00;  // two rows of U matrix
+  REGISTER Simd U_10;
+  REGISTER Simd U_20;  
+  REGISTER Simd U_01;
+  REGISTER Simd U_11;
+  REGISTER Simd U_21;  // 2 reg left.
 
 #define Chimu_00 Chi_00
 #define Chimu_01 Chi_01
@@ -752,7 +744,7 @@ void DiracOptHandDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
   TP_RECON_ACCUM;
 
   {
-    vSpinColourVector & ref (out._odata[ss]);
+    SiteSpinor & ref (out._odata[ss]);
     vstream(ref()(0)(0),result_00*(-0.5));
     vstream(ref()(0)(1),result_01*(-0.5));
     vstream(ref()(0)(2),result_02*(-0.5));
@@ -767,4 +759,5 @@ void DiracOptHandDhopSiteDag(CartesianStencil &st,LatticeDoubledGaugeField &U,
     vstream(ref()(3)(2),result_32*(-0.5));
   }
 }
+#endif
 }}
diff --git a/lib/qcd/action/pseudofermion/TwoFlavour.h b/lib/qcd/action/pseudofermion/TwoFlavour.h
index 22f761fe..635c1ca6 100644
--- a/lib/qcd/action/pseudofermion/TwoFlavour.h
+++ b/lib/qcd/action/pseudofermion/TwoFlavour.h
@@ -95,12 +95,13 @@ namespace Grid{
     ////////////////////////////////////////////////////////////////////////
     // Two flavour pseudofermion action for any dop
     ////////////////////////////////////////////////////////////////////////
-    template<class GaugeField,class MatrixField,class FermionField>
-      class TwoFlavourPseudoFermionAction : public Action<GaugeField> {
+    template<class Impl>
+    class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
 
     private:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
       
-      FermionOperator<FermionField,GaugeField> & FermOp;// the basic operator
+      FermionOperator<Impl> & FermOp;// the basic operator
 
       OperatorFunction<FermionField> &DerivativeSolver;
 
@@ -112,7 +113,7 @@ namespace Grid{
       /////////////////////////////////////////////////
       // Pass in required objects.
       /////////////////////////////////////////////////
-    TwoFlavourPseudoFermionAction(FermionOperator<FermionField,GaugeField>  &Op, 
+    TwoFlavourPseudoFermionAction(FermionOperator<Impl>  &Op, 
 				  OperatorFunction<FermionField> & DS,
 				  OperatorFunction<FermionField> & AS
 				  ) : FermOp(Op), DerivativeSolver(DS), ActionSolver(AS), Phi(Op.FermionGrid()) {
@@ -158,7 +159,7 @@ namespace Grid{
 	FermionField X(FermOp.FermionGrid());
 	FermionField Y(FermOp.FermionGrid());
 	
-	MdagMLinearOperator<FermionOperator<FermionField,GaugeField> ,FermionField> MdagMOp(FermOp);
+	MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(FermOp);
 	X=zero;
 	ActionSolver(MdagMOp,Phi,X);
 	MdagMOp.Op(X,Y);
@@ -183,7 +184,7 @@ namespace Grid{
 	FermionField Y(FermOp.FermionGrid());
 	GaugeField   tmp(FermOp.GaugeGrid());
 
-	MdagMLinearOperator<FermionOperator<FermionField,GaugeField> ,FermionField> MdagMOp(FermOp);
+	MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(FermOp);
 
 	X=zero;
 	DerivativeSolver(MdagMOp,Phi,X);
diff --git a/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h b/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h
index 63dec41a..35a208c8 100644
--- a/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h
+++ b/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h
@@ -4,13 +4,18 @@
 namespace Grid{
   namespace QCD{
 
-    template<class Matrix,class FermionField>
-      class SchurDifferentiableOperator :  public SchurDiagMooeeOperator<Matrix,FermionField> 
+    template<class Impl>
+    class SchurDifferentiableOperator :  public SchurDiagMooeeOperator<FermionOperator<Impl>,typename Impl::FermionField> 
       {
+      public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
+	
     public:
+	typedef FermionOperator<Impl> Matrix;
+
 	SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator<Matrix,FermionField>(Mat) {};
 
-	void MpcDeriv(LatticeGaugeField &Force,const FermionField &U,const FermionField &V) {
+	void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
 	
 	  GridBase *fgrid   = this->_Mat.FermionGrid();
 	  GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
@@ -28,8 +33,8 @@ namespace Grid{
 	  assert(U.checkerboard==Odd);
 	  assert(V.checkerboard==V.checkerboard);
 
-	  LatticeGaugeField ForceO(ucbgrid);
-	  LatticeGaugeField ForceE(ucbgrid);
+	  GaugeField ForceO(ucbgrid);
+	  GaugeField ForceE(ucbgrid);
 
 	  //  X^dag Der_oe MeeInv Meo Y
 	  // Use Mooee as nontrivial but gauge field indept
@@ -48,7 +53,7 @@ namespace Grid{
 	}
 
 
-	void MpcDagDeriv(LatticeGaugeField &Force,const FermionField &U,const FermionField &V) {
+	void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
 	
 	  GridBase *fgrid   = this->_Mat.FermionGrid();
 	  GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
@@ -66,8 +71,8 @@ namespace Grid{
 	  assert(V.checkerboard==Odd);
 	  assert(V.checkerboard==V.checkerboard);
 
-	  LatticeGaugeField ForceO(ucbgrid);
-	  LatticeGaugeField ForceE(ucbgrid);
+	  GaugeField ForceO(ucbgrid);
+	  GaugeField ForceE(ucbgrid);
 
 	  //  X^dag Der_oe MeeInv Meo Y
 	  // Use Mooee as nontrivial but gauge field indept
@@ -91,12 +96,15 @@ namespace Grid{
     ////////////////////////////////////////////////////////////////////////
     // Two flavour pseudofermion action for any EO prec dop
     ////////////////////////////////////////////////////////////////////////
-    template<class GaugeField,class MatrixField,class FermionField>
-      class TwoFlavourEvenOddPseudoFermionAction : public Action<GaugeField> {
+    template<class Impl>
+    class TwoFlavourEvenOddPseudoFermionAction : public Action<typename Impl::GaugeField> {
+
+    public:
+#include <qcd/action/fermion/FermionImplTypedefs.h>
 
     private:
       
-      FermionOperator<FermionField,GaugeField> & FermOp;// the basic operator
+      FermionOperator<Impl> & FermOp;// the basic operator
 
       OperatorFunction<FermionField> &DerivativeSolver;
 
@@ -109,7 +117,7 @@ namespace Grid{
       /////////////////////////////////////////////////
       // Pass in required objects.
       /////////////////////////////////////////////////
-      TwoFlavourEvenOddPseudoFermionAction(FermionOperator<FermionField,GaugeField>  &Op, 
+      TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl>  &Op, 
 					 OperatorFunction<FermionField> & DS,
 					 OperatorFunction<FermionField> & AS
 					   ) : 
@@ -140,7 +148,7 @@ namespace Grid{
 	pickCheckerboard(Even,etaEven,eta);
 	pickCheckerboard(Odd,etaOdd,eta);
 
-	SchurDifferentiableOperator<FermionOperator<FermionField,GaugeField>,FermionField> PCop(FermOp);
+	SchurDifferentiableOperator<Impl> PCop(FermOp);
 	
 	FermOp.ImportGauge(U);
 
@@ -163,7 +171,7 @@ namespace Grid{
 	FermionField X(FermOp.FermionRedBlackGrid());
 	FermionField Y(FermOp.FermionRedBlackGrid());
 	
-	SchurDifferentiableOperator<FermionOperator<FermionField,GaugeField>,FermionField> PCop(FermOp);
+	SchurDifferentiableOperator<Impl> PCop(FermOp);
 
 	X=zero;
 	ActionSolver(PCop,PhiOdd,X);
@@ -195,7 +203,7 @@ namespace Grid{
 	FermionField Y(FermOp.FermionRedBlackGrid());
 	GaugeField tmp(FermOp.GaugeGrid());
 
-	SchurDifferentiableOperator<FermionOperator<FermionField,GaugeField>,FermionField> PCop(FermOp);
+	SchurDifferentiableOperator<Impl> PCop(FermOp);
 
 	X=zero;
 	DerivativeSolver(PCop,PhiOdd,X);
diff --git a/lib/qcd/spin/Dirac.h b/lib/qcd/spin/Dirac.h
index 3208d0b5..65d8d6b5 100644
--- a/lib/qcd/spin/Dirac.h
+++ b/lib/qcd/spin/Dirac.h
@@ -4,7 +4,6 @@ namespace Grid{
 
 namespace QCD {
 
-  const int SpinorIndex = 2;
 
   class Gamma {
 
diff --git a/lib/qcd/spin/TwoSpinor.h b/lib/qcd/spin/TwoSpinor.h
index 919f0f76..94ba9978 100644
--- a/lib/qcd/spin/TwoSpinor.h
+++ b/lib/qcd/spin/TwoSpinor.h
@@ -42,18 +42,14 @@ namespace QCD {
    * -i 0  0  0
    */
 
-
-  template<class vtype> strong_inline void 
-    spProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  // To fail is not to err (Cryptic clue: suggest to Google SFINAE ;) )
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      // To fail is not to err (Cryptic clue: suggest to Google SFINAE ;) )
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)+timesI(fspin(3));
       hspin(1)=fspin(1)+timesI(fspin(2));
     }
-  template<class vtype> strong_inline void spProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)-timesI(fspin(3));
       hspin(1)=fspin(1)-timesI(fspin(2));
     }
@@ -62,15 +58,13 @@ namespace QCD {
       //  0 0  1  0   [1] +- [2]
       //  0 1  0  0
       // -1 0  0  0
-  template<class vtype> strong_inline void spProjYp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjYp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)-fspin(3);
       hspin(1)=fspin(1)+fspin(2);
     }
-  template<class vtype> strong_inline void spProjYm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjYm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)+fspin(3);
       hspin(1)=fspin(1)-fspin(2);
     }
@@ -80,15 +74,14 @@ namespace QCD {
 	     * -i 0  0  0
 	     *  0 i  0  0
 	     */
-  template<class vtype> strong_inline void spProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)+timesI(fspin(2));
       hspin(1)=fspin(1)-timesI(fspin(3));
     }
-  template<class vtype> strong_inline void spProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)-timesI(fspin(2));
       hspin(1)=fspin(1)+timesI(fspin(3));
     }
@@ -98,15 +91,15 @@ namespace QCD {
 	     *  1 0  0  0
 	     *  0 1  0  0
 	     */
-  template<class vtype> strong_inline void spProjTp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjTp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)+fspin(2);
       hspin(1)=fspin(1)+fspin(3);
     }
-  template<class vtype> strong_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0)-fspin(2);
       hspin(1)=fspin(1)-fspin(3);
     }
@@ -117,32 +110,33 @@ namespace QCD {
 	     *  0 0  0 -1
 	     */
 
-  template<class vtype> strong_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(0);
       hspin(1)=fspin(1);
     }
-  template<class vtype> strong_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
+
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       hspin(0)=fspin(2);
       hspin(1)=fspin(3);
     }
   
   //  template<class vtype> strong_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
-  template<class vtype> strong_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       rfspin(0)=fspin(0);
       rfspin(1)=fspin(1);
       rfspin(2)=zero;
       rfspin(3)=zero;
     }
   //  template<class vtype> strong_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
-  template<class vtype> strong_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       rfspin(0)=zero;
       rfspin(1)=zero;
       rfspin(2)=fspin(2);
@@ -158,33 +152,33 @@ namespace QCD {
    *  0 -i 0  0  -i[1]+-[2]   == -i ([0]+-i[3]) = -i (1)
    * -i 0  0  0  -i[0]+-[3]   == -i ([1]+-i[2]) = -i (0)
    */
-  template<class vtype> strong_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=timesMinusI(hspin(1));
       fspin(3)=timesMinusI(hspin(0));
     }
-  template<class vtype> strong_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=timesI(hspin(1));
       fspin(3)=timesI(hspin(0));
     }
-  template<class vtype> strong_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)-=timesI(hspin(1));
       fspin(3)-=timesI(hspin(0));
     }
-  template<class vtype> strong_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)+=timesI(hspin(1));
@@ -196,33 +190,33 @@ namespace QCD {
       //  0 1  0  0              == 1(1)
       // -1 0  0  0              ==-1(0)
 
-  template<class vtype> strong_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)= hspin(1);
       fspin(3)=-hspin(0);//Unary minus?
     }
-  template<class vtype> strong_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=-hspin(1);
       fspin(3)= hspin(0);
     }
-  template<class vtype> strong_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)+=hspin(1);
       fspin(3)-=hspin(0);
     }
-  template<class vtype> strong_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)-=hspin(1);
@@ -235,33 +229,33 @@ namespace QCD {
 	     * -i 0  0  0     => -i (0)
 	     *  0 i  0  0     =>  i (1)
 	     */
-  template<class vtype> strong_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=timesMinusI(hspin(0));
       fspin(3)=timesI(hspin(1));
     }
-  template<class vtype> strong_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=     timesI(hspin(0));
       fspin(3)=timesMinusI(hspin(1));
     }
-  template<class vtype> strong_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)-=timesI(hspin(0));
       fspin(3)+=timesI(hspin(1));
     }
-  template<class vtype> strong_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)+=timesI(hspin(0));
@@ -273,33 +267,33 @@ namespace QCD {
 	     *  1 0  0  0    => (0)
 	     *  0 1  0  0    => (1)
 	     */
-  template<class vtype> strong_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=hspin(0);
       fspin(3)=hspin(1);
     }
-  template<class vtype> strong_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0);
       fspin(1)=hspin(1);
       fspin(2)=-hspin(0);
       fspin(3)=-hspin(1);
     }
-  template<class vtype> strong_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)+=hspin(0);
       fspin(3)+=hspin(1);
     }
-  template<class vtype> strong_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0);
       fspin(1)+=hspin(1);
       fspin(2)-=hspin(0);
@@ -311,31 +305,31 @@ namespace QCD {
 	     *  0 0 -1  0
 	     *  0 0  0 -1
 	     */
-  template<class vtype> strong_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
       fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
       fspin(2)=zero;
       fspin(3)=zero;
     }
-  template<class vtype> strong_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)=zero;
       fspin(1)=zero;
       fspin(2)=hspin(0)+hspin(0);
       fspin(3)=hspin(1)+hspin(1);
     }
-  template<class vtype> strong_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(0)+=hspin(0)+hspin(0);
       fspin(1)+=hspin(1)+hspin(1);
     }
-  template<class vtype> strong_inline void accumRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
+  template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> strong_inline void accumRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
     {
-      typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
+      //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
       fspin(2)+=hspin(0)+hspin(0);
       fspin(3)+=hspin(1)+hspin(1);
     }
@@ -347,21 +341,19 @@ namespace QCD {
   //////////
   // Xp
   //////////
-  template<class rtype,class vtype> strong_inline void spProjXp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
-    spProjXp(hspin._internal,fspin._internal);
-  }
-  template<class rtype,class vtype,int N> strong_inline void spProjXp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
-  {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjXp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjXp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype> strong_inline void spProjXp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
+  {
+    spProjXp(hspin._internal,fspin._internal);
+  }
+  template<class rtype,class vtype,int N> strong_inline void spProjXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjXp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -371,19 +363,16 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconXp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconXp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconXp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconXp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconXp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconXp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -392,19 +381,16 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumReconXp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconXp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconXp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconXp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconXp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconXp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconXp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -418,19 +404,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjXm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjXm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjXm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjXm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -440,19 +426,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconXm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconXm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconXm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconXm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -461,19 +447,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconXm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconXm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconXm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconXm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -487,19 +473,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjYp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjYp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjYp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjYp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -509,19 +495,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconYp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconYp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconYp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconYp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -530,19 +516,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconYp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconYp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconYp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconYp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -555,19 +541,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjYm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjYm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjYm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjYm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -577,19 +563,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconYm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconYm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconYm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconYm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -598,19 +584,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconYm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconYm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconYm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconYm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -623,19 +609,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjZp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjZp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjZp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjZp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -645,19 +631,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconZp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconZp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconZp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconZp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -666,19 +652,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconZp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconZp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconZp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconZp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -691,19 +677,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjZm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjZm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjZm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjZm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -713,19 +699,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconZm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconZm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconZm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconZm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -734,19 +720,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconZm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconZm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconZm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconZm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -759,19 +745,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjTp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjTp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjTp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjTp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -781,40 +767,40 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconTp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconTp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconTp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconTp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
     }}
   }
 
-  template<class rtype,class vtype> strong_inline void accumReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
+  template<class rtype,class vtype> strong_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconTp(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconTp (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconTp(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconTp (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
@@ -827,19 +813,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProjTm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjTm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProjTm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProjTm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -847,42 +833,42 @@ namespace QCD {
   }
 
 
-  template<class rtype,class vtype> strong_inline void spReconTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
+  template<class rtype,class vtype> strong_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spReconTm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconTm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spReconTm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spReconTm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
     }}
   }
 
-  template<class rtype,class vtype> strong_inline void accumReconTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
+  template<class rtype,class vtype> strong_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumReconTm(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconTm (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumReconTm(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumReconTm (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
@@ -895,19 +881,19 @@ namespace QCD {
   ////////
   template<class rtype,class vtype> strong_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProj5p(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProj5p (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProj5p(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProj5p (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -916,19 +902,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spRecon5p(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spRecon5p (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spRecon5p(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spRecon5p (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -937,19 +923,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumRecon5p(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumRecon5p (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumRecon5p(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumRecon5p (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -960,21 +946,21 @@ namespace QCD {
   //  template<class vtype> strong_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
   template<class vtype> strong_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProj5p(hspin._internal,fspin._internal);
   }
   //  template<class vtype,int N> strong_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
-  template<class vtype,int N> strong_inline void spProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProj5p(hspin._internal[i],fspin._internal[i]);
     }
   }
   //  template<class vtype,int N> strong_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
-  template<class vtype,int N> strong_inline void spProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class vtype,int N> strong_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
@@ -985,21 +971,19 @@ namespace QCD {
   ////////
   // 5m
   ////////
+
   template<class rtype,class vtype> strong_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProj5m(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spProj5m (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<rtype> = 0> strong_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProj5m(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spProj5m (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
@@ -1008,19 +992,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spRecon5m(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void spRecon5m (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spRecon5m(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void spRecon5m (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
@@ -1029,19 +1013,19 @@ namespace QCD {
 
   template<class rtype,class vtype> strong_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     accumRecon5m(hspin._internal,fspin._internal);
   }
-  template<class rtype,class vtype,int N> strong_inline void accumRecon5m (iVector<rtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class rtype,class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       accumRecon5m(hspin._internal[i],fspin._internal[i]);
     }
   }
-  template<class rtype,class vtype,int N> strong_inline void accumRecon5m (iMatrix<rtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class rtype,class vtype,int N> strong_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
@@ -1053,21 +1037,21 @@ namespace QCD {
   //  template<class vtype> strong_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
   template<class vtype> strong_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
     spProj5m(hspin._internal,fspin._internal);
   }
   //  template<class vtype,int N> strong_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
-  template<class vtype,int N> strong_inline void spProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
+  template<class vtype,int N,IfNotSpinor<vtype> = 0> strong_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
     for(int i=0;i<N;i++) {
       spProj5m(hspin._internal[i],fspin._internal[i]);
     }
   }
   //  template<class vtype,int N> strong_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
-  template<class vtype,int N> strong_inline void spProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
+  template<class vtype,int N> strong_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
   {
-    typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
+    //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
     for(int i=0;i<N;i++){ 
     for(int j=0;j<N;j++){
       spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h
index 703a2ff5..a7d51b0e 100644
--- a/lib/tensors/Tensor_traits.h
+++ b/lib/tensors/Tensor_traits.h
@@ -202,7 +202,6 @@ namespace Grid {
     static const bool value = true;
   };
 
-
 }
 
 #endif
diff --git a/scripts/cleanup b/scripts/cleanup
index 956a6884..ff6dcf39 100755
--- a/scripts/cleanup
+++ b/scripts/cleanup
@@ -1,3 +1,4 @@
 find . -name "*~" -print -exec rm -f {} \;
 find . -name "errs" -print -exec rm -f {} \;
 find . -name "log" -print -exec rm -f {} \;
+find . -name "*.bak" -print -exec rm -f {} \;
diff --git a/tests/Test_cayley_cg.cc b/tests/Test_cayley_cg.cc
index f8d1187c..0f4955fd 100644
--- a/tests/Test_cayley_cg.cc
+++ b/tests/Test_cayley_cg.cc
@@ -72,34 +72,34 @@ int main (int argc, char ** argv)
   RealD mass=0.1;
   RealD M5  =1.8;
   std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  TestCGinversions<DomainWallFermion>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   RealD b=1.5;// Scale factor b+c=2, b-c=1
   RealD c=0.5;
   std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
-  MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
-  TestCGinversions<MobiusFermion>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
+  TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
-  MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
-  TestCGinversions<MobiusZolotarevFermion>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
+  TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
-  ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
-  TestCGinversions<ScaledShamirFermion>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
+  TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
-  ShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
-  TestCGinversions<ShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
-  OverlapWilsonCayleyTanhFermion Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  TestCGinversions<OverlapWilsonCayleyTanhFermion>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
-  OverlapWilsonCayleyZolotarevFermion Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
-  TestCGinversions<OverlapWilsonCayleyZolotarevFermion>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }
diff --git a/tests/Test_cayley_coarsen_support.cc b/tests/Test_cayley_coarsen_support.cc
index 3ddf6205..77e77307 100644
--- a/tests/Test_cayley_coarsen_support.cc
+++ b/tests/Test_cayley_coarsen_support.cc
@@ -67,8 +67,8 @@ int main (int argc, char ** argv)
   RealD mass=0.5;
   RealD M5=1.8;
 
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> HermIndefOp(Ddwf);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
 
   HermIndefOp.Op(src,ref);
   HermIndefOp.OpDiag(src,result);
@@ -89,7 +89,7 @@ int main (int argc, char ** argv)
 
   std::cout<<GridLogMessage<<"Calling Aggregation class" <<std::endl;
 
-  MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermDefOp(Ddwf);
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
   typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
   Subspace Aggregates(Coarse5d,FGrid);
   Aggregates.CreateSubspaceRandom(RNG5);
diff --git a/tests/Test_cayley_even_odd.cc b/tests/Test_cayley_even_odd.cc
index d0c7b74a..c36567cd 100644
--- a/tests/Test_cayley_even_odd.cc
+++ b/tests/Test_cayley_even_odd.cc
@@ -49,35 +49,35 @@ int main (int argc, char ** argv)
   RealD mass=0.1;
   RealD M5  =1.8;
   std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  TestWhat<DomainWallFermion>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestWhat<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   RealD b=1.5;// Scale factor b+c=2, b-c=1
   RealD c=0.5;
   std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
-  MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
-  TestWhat<MobiusFermion>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
+  TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
-  MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
-  TestWhat<MobiusZolotarevFermion>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
+  TestWhat<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
-  ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
-  TestWhat<ScaledShamirFermion>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
+  TestWhat<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
 
   std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
-  ShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
-  TestWhat<ShamirZolotarevFermion>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestWhat<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
-  OverlapWilsonCayleyTanhFermion Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  TestWhat<OverlapWilsonCayleyTanhFermion>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestWhat<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
-  OverlapWilsonCayleyZolotarevFermion Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
-  TestWhat<OverlapWilsonCayleyZolotarevFermion>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestWhat<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }
diff --git a/tests/Test_cayley_ldop_cr.cc b/tests/Test_cayley_ldop_cr.cc
index 48d7c9dc..2414d6f1 100644
--- a/tests/Test_cayley_ldop_cr.cc
+++ b/tests/Test_cayley_ldop_cr.cc
@@ -55,8 +55,8 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
-  Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> HermIndefOp(Ddwf);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
 
   const int nbasis = 8;
 
@@ -67,7 +67,7 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermDefOp(Ddwf);
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
   Subspace Aggregates(Coarse5d,FGrid);
   Aggregates.CreateSubspace(RNG5,HermDefOp);
 
diff --git a/tests/Test_cf_coarsen_support.cc b/tests/Test_cf_coarsen_support.cc
index caa862be..d5cb7dfc 100644
--- a/tests/Test_cf_coarsen_support.cc
+++ b/tests/Test_cf_coarsen_support.cc
@@ -48,8 +48,8 @@ int main (int argc, char ** argv)
   RealD M5=1.8;
 
   {
-    OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-    HermitianLinearOperator<OverlapWilsonContFracTanhFermion,LatticeFermion> HermIndefOp(Dcf);
+    OverlapWilsonContFracTanhFermionR Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+    HermitianLinearOperator<OverlapWilsonContFracTanhFermionR,LatticeFermion> HermIndefOp(Dcf);
 
     HermIndefOp.Op(src,ref);
     HermIndefOp.OpDiag(src,result);
@@ -65,8 +65,8 @@ int main (int argc, char ** argv)
   }
 
   {
-    OverlapWilsonPartialFractionTanhFermion Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-    HermitianLinearOperator<OverlapWilsonPartialFractionTanhFermion,LatticeFermion> HermIndefOp(Dpf);
+    OverlapWilsonPartialFractionTanhFermionR Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+    HermitianLinearOperator<OverlapWilsonPartialFractionTanhFermionR,LatticeFermion> HermIndefOp(Dpf);
     
     HermIndefOp.Op(src,ref);
     HermIndefOp.OpDiag(src,result);
diff --git a/tests/Test_cf_cr_unprec.cc b/tests/Test_cf_cr_unprec.cc
index 90acb503..7cb238ea 100644
--- a/tests/Test_cf_cr_unprec.cc
+++ b/tests/Test_cf_cr_unprec.cc
@@ -44,14 +44,14 @@ int main (int argc, char ** argv)
   RealD mass=0.1;
   RealD M5=1.8;
 
-  OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  OverlapWilsonContFracTanhFermionR Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
 
   ConjugateResidual<LatticeFermion> MCR(1.0e-8,10000);
 
-  MdagMLinearOperator<OverlapWilsonContFracTanhFermion,LatticeFermion> HermPosDefOp(Dcf);
+  MdagMLinearOperator<OverlapWilsonContFracTanhFermionR,LatticeFermion> HermPosDefOp(Dcf);
   MCR(HermPosDefOp,src,result);
 
-  HermitianLinearOperator<OverlapWilsonContFracTanhFermion,LatticeFermion> HermIndefOp(Dcf);
+  HermitianLinearOperator<OverlapWilsonContFracTanhFermionR,LatticeFermion> HermIndefOp(Dcf);
   MCR(HermIndefOp,src,result);
 
   Grid_finalize();
diff --git a/tests/Test_contfrac_cg.cc b/tests/Test_contfrac_cg.cc
index da6a81b9..abf46754 100644
--- a/tests/Test_contfrac_cg.cc
+++ b/tests/Test_contfrac_cg.cc
@@ -75,21 +75,21 @@ int main (int argc, char ** argv)
 
 
   std::cout<<GridLogMessage <<"OverlapWilsonContFracTanhFermion  test"<<std::endl;
-  OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  TestCGinversions<OverlapWilsonContFracTanhFermion>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonContFracTanhFermionR Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonContFracTanhFermionR>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonContFracZolotarevFermion  test"<<std::endl;
-  OverlapWilsonContFracZolotarevFermion Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
-  TestCGinversions<OverlapWilsonContFracZolotarevFermion>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonContFracZolotarevFermionR Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestCGinversions<OverlapWilsonContFracZolotarevFermionR>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
 
   std::cout<<GridLogMessage <<"OverlapWilsonPartialFractionTanhFermion  test"<<std::endl;
-  OverlapWilsonPartialFractionTanhFermion Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  TestCGinversions<OverlapWilsonPartialFractionTanhFermion>(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonPartialFractionTanhFermionR Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonPartialFractionTanhFermionR>(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonPartialFractionZolotarevFermion  test"<<std::endl;
-  OverlapWilsonPartialFractionZolotarevFermion Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
-  TestCGinversions<OverlapWilsonPartialFractionZolotarevFermion>(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonPartialFractionZolotarevFermionR Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestCGinversions<OverlapWilsonPartialFractionZolotarevFermionR>(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
 
   Grid_finalize();
diff --git a/tests/Test_contfrac_even_odd.cc b/tests/Test_contfrac_even_odd.cc
index c8e5b093..7fcfc0f4 100644
--- a/tests/Test_contfrac_even_odd.cc
+++ b/tests/Test_contfrac_even_odd.cc
@@ -50,20 +50,20 @@ int main (int argc, char ** argv)
   RealD M5  =1.8;
 
   std::cout<<GridLogMessage <<"OverlapWilsonContFracTanhFermion  test"<<std::endl;
-  OverlapWilsonContFracTanhFermion Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  TestWhat<OverlapWilsonContFracTanhFermion>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonContFracTanhFermionR Dcf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestWhat<OverlapWilsonContFracTanhFermionR>(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonContFracZolotarevFermion  test"<<std::endl;
-  OverlapWilsonContFracZolotarevFermion Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
-  TestWhat<OverlapWilsonContFracZolotarevFermion>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonContFracZolotarevFermionR Dcfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestWhat<OverlapWilsonContFracZolotarevFermionR>(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonPartialFractionTanhFermion  test"<<std::endl;
-  OverlapWilsonPartialFractionTanhFermion Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
-  TestWhat<OverlapWilsonPartialFractionTanhFermion>(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonPartialFractionTanhFermionR Dpf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestWhat<OverlapWilsonPartialFractionTanhFermionR>(Dpf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   std::cout<<GridLogMessage <<"OverlapWilsonPartialFractionZolotarevFermion  test"<<std::endl;
-  OverlapWilsonPartialFractionZolotarevFermion Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
-  TestWhat<OverlapWilsonPartialFractionZolotarevFermion>(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+  OverlapWilsonPartialFractionZolotarevFermionR Dpfz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,6.0);
+  TestWhat<OverlapWilsonPartialFractionZolotarevFermionR>(Dpfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 
   Grid_finalize();
 }
diff --git a/tests/Test_contfrac_force.cc b/tests/Test_contfrac_force.cc
index a39d596d..58b42693 100644
--- a/tests/Test_contfrac_force.cc
+++ b/tests/Test_contfrac_force.cc
@@ -42,7 +42,7 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   RealD mass=0.01; 
   RealD M5=1.8; 
-  OverlapWilsonContFracTanhFermion Dcf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  OverlapWilsonContFracTanhFermionR Dcf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
   Dcf.M   (phi,Mphi);
 
   ComplexD S    = innerProduct(Mphi,Mphi); // pdag MdagM p
diff --git a/tests/Test_dwf_cg_prec.cc b/tests/Test_dwf_cg_prec.cc
index 633d280f..a39b6529 100644
--- a/tests/Test_dwf_cg_prec.cc
+++ b/tests/Test_dwf_cg_prec.cc
@@ -45,14 +45,14 @@ int main (int argc, char ** argv)
   
   RealD mass=0.1;
   RealD M5=1.8;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
   LatticeFermion    src_o(FrbGrid);
   LatticeFermion result_o(FrbGrid);
   pickCheckerboard(Odd,src_o,src);
   result_o=zero;
 
-  SchurDiagMooeeOperator<DomainWallFermion,LatticeFermion> HermOpEO(Ddwf);
+  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   CG(HermOpEO,src_o,result_o);
 
diff --git a/tests/Test_dwf_cg_schur.cc b/tests/Test_dwf_cg_schur.cc
index ac3989d9..e9db375e 100644
--- a/tests/Test_dwf_cg_schur.cc
+++ b/tests/Test_dwf_cg_schur.cc
@@ -43,7 +43,7 @@ int main (int argc, char ** argv)
   
   RealD mass=0.1;
   RealD M5=1.8;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
diff --git a/tests/Test_dwf_cg_unprec.cc b/tests/Test_dwf_cg_unprec.cc
index 055e1bef..55d0c8d0 100644
--- a/tests/Test_dwf_cg_unprec.cc
+++ b/tests/Test_dwf_cg_unprec.cc
@@ -43,9 +43,9 @@ int main (int argc, char ** argv)
   
   RealD mass=0.1;
   RealD M5=1.8;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
-  MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermOp(Ddwf);
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   CG(HermOp,src,result);
 
diff --git a/tests/Test_dwf_cr_unprec.cc b/tests/Test_dwf_cr_unprec.cc
index c2b75853..2d791ce2 100644
--- a/tests/Test_dwf_cr_unprec.cc
+++ b/tests/Test_dwf_cr_unprec.cc
@@ -50,12 +50,12 @@ int main (int argc, char ** argv)
   
   RealD mass=0.5;
   RealD M5=1.8;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
-  MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermOp(Ddwf);
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
   MCR(HermOp,src,result);
 
-  Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> g5HermOp(Ddwf);
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> g5HermOp(Ddwf);
   MCR(g5HermOp,src,result);
 
 
diff --git a/tests/Test_dwf_even_odd.cc b/tests/Test_dwf_even_odd.cc
index 4e3d9799..96853b4c 100644
--- a/tests/Test_dwf_even_odd.cc
+++ b/tests/Test_dwf_even_odd.cc
@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
   RealD M5  =1.8;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
   LatticeFermion src_e (FrbGrid);
   LatticeFermion src_o (FrbGrid);
@@ -187,7 +187,7 @@ int main (int argc, char ** argv)
   RealD t1,t2;
 
 
-  SchurDiagMooeeOperator<DomainWallFermion,LatticeFermion> HermOpEO(Ddwf);
+  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
   HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
   HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
 
diff --git a/tests/Test_dwf_force.cc b/tests/Test_dwf_force.cc
index a3dbb0f2..ac6fd26a 100644
--- a/tests/Test_dwf_force.cc
+++ b/tests/Test_dwf_force.cc
@@ -42,7 +42,7 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   RealD mass=0.01; 
   RealD M5=1.8; 
-  DomainWallFermion Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
   Ddwf.M   (phi,Mphi);
 
   ComplexD S    = innerProduct(Mphi,Mphi); // pdag MdagM p
diff --git a/tests/Test_dwf_fpgcr.cc b/tests/Test_dwf_fpgcr.cc
index 6c589c6c..6c9645b9 100644
--- a/tests/Test_dwf_fpgcr.cc
+++ b/tests/Test_dwf_fpgcr.cc
@@ -52,19 +52,19 @@ int main (int argc, char ** argv)
   
   RealD mass=0.5;
   RealD M5=1.8;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
   std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
   std::cout<<GridLogMessage<<"* Solving with MdagM VPGCR "<<std::endl;
   std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
-  MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermOp(Ddwf);
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermOp(Ddwf);
   result=zero;
   PGCR(HermOp,src,result);
 
   std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
   std::cout<<GridLogMessage<<"* Solving with g5-VPGCR "<<std::endl;
   std::cout<<GridLogMessage<<"*********************************************************"<<std::endl;
-  Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> g5HermOp(Ddwf);
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> g5HermOp(Ddwf);
   result=zero;
   PGCR(g5HermOp,src,result);
 
diff --git a/tests/Test_dwf_hdcr.cc b/tests/Test_dwf_hdcr.cc
index 6ff26425..45647936 100644
--- a/tests/Test_dwf_hdcr.cc
+++ b/tests/Test_dwf_hdcr.cc
@@ -401,7 +401,7 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 
   const int nbasis = 32;
   //  const int nbasis = 4;
@@ -413,7 +413,7 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  MdagMLinearOperator<DomainWallFermion,LatticeFermion> HermDefOp(Ddwf);
+  MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
   Subspace Aggregates(Coarse5d,FGrid);
   //  Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
   assert ( (nbasis & 0x1)==0);
@@ -437,7 +437,7 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  Gamma5R5HermitianLinearOperator<DomainWallFermion,LatticeFermion> HermIndefOp(Ddwf);
+  Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
   CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse5d);
   LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
 
@@ -467,7 +467,7 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 
-  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermion> Precon(Aggregates, LDOp,HermIndefOp,Ddwf);
+  MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> Precon(Aggregates, LDOp,HermIndefOp,Ddwf);
   TrivialPrecon<LatticeFermion> simple;
 
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
@@ -506,7 +506,7 @@ int main (int argc, char ** argv)
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
   std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl;
   std::cout<<GridLogMessage << "**************************************************"<< std::endl;
-  SchurDiagMooeeOperator<DomainWallFermion,LatticeFermion> HermOpEO(Ddwf);
+  SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
   ConjugateGradient<LatticeFermion> pCG(1.0e-8,10000);
 
   LatticeFermion    src_o(FrbGrid);
diff --git a/tests/Test_hmc_EOWilsonFermionGauge.cc b/tests/Test_hmc_EOWilsonFermionGauge.cc
index 1c90933e..66ec118b 100644
--- a/tests/Test_hmc_EOWilsonFermionGauge.cc
+++ b/tests/Test_hmc_EOWilsonFermionGauge.cc
@@ -25,13 +25,12 @@ int main (int argc, char ** argv)
   WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
 
   Real mass=-0.77;
-  WilsonFermion FermOp(U,Fine,RBFine,mass);
+  WilsonFermionR FermOp(U,Fine,RBFine,mass);
   
   ConjugateGradient<LatticeFermion>  CG(1.0e-8,10000);
   ConjugateGradient<LatticeFermion>  CGmd(1.0e-6,10000);
   
-  TwoFlavourEvenOddPseudoFermionAction<LatticeLorentzColourMatrix, LatticeColourMatrix,LatticeFermion> 
-    WilsonNf2(FermOp,CG,CG);
+  TwoFlavourEvenOddPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
   
   //Collect actions
   ActionLevel Level1;
diff --git a/tests/Test_hmc_WilsonFermionGauge.cc b/tests/Test_hmc_WilsonFermionGauge.cc
index e3bbf393..ef598062 100644
--- a/tests/Test_hmc_WilsonFermionGauge.cc
+++ b/tests/Test_hmc_WilsonFermionGauge.cc
@@ -25,13 +25,12 @@ int main (int argc, char ** argv)
   WilsonGaugeAction<LatticeLorentzColourMatrix, LatticeColourMatrix> Waction(5.6);
 
   Real mass=-0.77;
-  WilsonFermion FermOp(U,Fine,RBFine,mass);
+  WilsonFermionR FermOp(U,Fine,RBFine,mass);
   
   ConjugateGradient<LatticeFermion>  CG(1.0e-8,10000);
   ConjugateGradient<LatticeFermion>  CGmd(1.0e-6,10000);
   
-  TwoFlavourPseudoFermionAction<LatticeLorentzColourMatrix, LatticeColourMatrix,LatticeFermion> 
-    WilsonNf2(FermOp,CG,CG);
+  TwoFlavourPseudoFermionAction<WilsonImplR> WilsonNf2(FermOp,CG,CG);
   
   //Collect actions
   ActionLevel Level1;
diff --git a/tests/Test_partfrac_force.cc b/tests/Test_partfrac_force.cc
index 05925e55..6e543d3c 100644
--- a/tests/Test_partfrac_force.cc
+++ b/tests/Test_partfrac_force.cc
@@ -42,7 +42,7 @@ int main (int argc, char ** argv)
   ////////////////////////////////////
   RealD mass=0.01; 
   RealD M5=1.8; 
-  OverlapWilsonPartialFractionTanhFermion Dpf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  OverlapWilsonPartialFractionTanhFermionR Dpf(U,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
   Dpf.M   (phi,Mphi);
 
   ComplexD S    = innerProduct(Mphi,Mphi); // pdag MdagM p
diff --git a/tests/Test_wilson_cg_prec.cc b/tests/Test_wilson_cg_prec.cc
index c59699e4..756f8b6f 100644
--- a/tests/Test_wilson_cg_prec.cc
+++ b/tests/Test_wilson_cg_prec.cc
@@ -42,7 +42,7 @@ int main (int argc, char ** argv)
   }
   
   RealD mass=0.5;
-  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   //  HermitianOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
   //  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
@@ -53,7 +53,7 @@ int main (int argc, char ** argv)
   pickCheckerboard(Odd,src_o,src);
   result_o=zero;
 
-  SchurDiagMooeeOperator<WilsonFermion,LatticeFermion> HermOpEO(Dw);
+  SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   CG(HermOpEO,src_o,result_o);
 
diff --git a/tests/Test_wilson_cg_schur.cc b/tests/Test_wilson_cg_schur.cc
index 1484a6a8..2174d644 100644
--- a/tests/Test_wilson_cg_schur.cc
+++ b/tests/Test_wilson_cg_schur.cc
@@ -37,7 +37,7 @@ int main (int argc, char ** argv)
   LatticeFermion resid(&Grid); 
 
   RealD mass=0.5;
-  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
diff --git a/tests/Test_wilson_cg_unprec.cc b/tests/Test_wilson_cg_unprec.cc
index 5984cac9..9269aeaa 100644
--- a/tests/Test_wilson_cg_unprec.cc
+++ b/tests/Test_wilson_cg_unprec.cc
@@ -40,9 +40,9 @@ int main (int argc, char ** argv)
   }  
   
   RealD mass=0.5;
-  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
-  MdagMLinearOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
   CG(HermOp,src,result);
 
diff --git a/tests/Test_wilson_cr_unprec.cc b/tests/Test_wilson_cr_unprec.cc
index 468f0d5a..c7e0bc33 100644
--- a/tests/Test_wilson_cr_unprec.cc
+++ b/tests/Test_wilson_cr_unprec.cc
@@ -43,9 +43,9 @@ int main (int argc, char ** argv)
   }  
 
   RealD mass=0.5;
-  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
-  MdagMLinearOperator<WilsonFermion,LatticeFermion> HermOp(Dw);
+  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
 
   ConjugateResidual<LatticeFermion> MCR(1.0e-8,10000);
 
diff --git a/tests/Test_wilson_even_odd.cc b/tests/Test_wilson_even_odd.cc
index 66ac89dc..37ba40f1 100644
--- a/tests/Test_wilson_even_odd.cc
+++ b/tests/Test_wilson_even_odd.cc
@@ -62,7 +62,7 @@ int main (int argc, char ** argv)
 
   RealD mass=0.1;
 
-  WilsonFermion Dw(Umu,Grid,RBGrid,mass);
+  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 
   LatticeFermion src_e   (&RBGrid);
   LatticeFermion src_o   (&RBGrid);
@@ -179,7 +179,7 @@ int main (int argc, char ** argv)
   pickCheckerboard(Odd ,phi_o,phi);
   RealD t1,t2;
 
-  SchurDiagMooeeOperator<WilsonFermion,LatticeFermion> HermOpEO(Dw);
+  SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
   HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2);
   HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2);
 
diff --git a/tests/Test_wilson_force.cc b/tests/Test_wilson_force.cc
index b08ccd45..1b22c2e4 100644
--- a/tests/Test_wilson_force.cc
+++ b/tests/Test_wilson_force.cc
@@ -38,7 +38,7 @@ int main (int argc, char ** argv)
   // Unmodified matrix element
   ////////////////////////////////////
   RealD mass=-4.0; //kills the diagonal term
-  WilsonFermion Dw     (U,     Grid,RBGrid,mass);
+  WilsonFermionR Dw     (U,     Grid,RBGrid,mass);
   Dw.M   (phi,Mphi);
 
   ComplexD S    = innerProduct(Mphi,Mphi); // pdag MdagM p
@@ -88,7 +88,7 @@ int main (int argc, char ** argv)
   }
 
   std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl;
-  Dw.DoubleStore(Dw.Umu,Uprime);
+  Dw.ImportGauge(Uprime);
   Dw.M          (phi,MphiPrime);
 
   ComplexD Sprime    = innerProduct(MphiPrime   ,MphiPrime);
diff --git a/tests/Test_wilson_force_phiMdagMphi.cc b/tests/Test_wilson_force_phiMdagMphi.cc
index 33c5f6e1..88f3161f 100644
--- a/tests/Test_wilson_force_phiMdagMphi.cc
+++ b/tests/Test_wilson_force_phiMdagMphi.cc
@@ -42,7 +42,7 @@ int main (int argc, char ** argv)
   // Unmodified matrix element
   ////////////////////////////////////
   RealD mass=-4.0; //kills the diagonal term
-  WilsonFermion Dw     (U,     Grid,RBGrid,mass);
+  WilsonFermionR Dw     (U,     Grid,RBGrid,mass);
   Dw.M   (phi,Mphi);
   Dw.Mdag(phi,Mdagphi);
 
diff --git a/tests/Test_wilson_force_phiMphi.cc b/tests/Test_wilson_force_phiMphi.cc
index f1e42195..ff9e0fb3 100644
--- a/tests/Test_wilson_force_phiMphi.cc
+++ b/tests/Test_wilson_force_phiMphi.cc
@@ -39,7 +39,7 @@ int main (int argc, char ** argv)
   // Unmodified matrix element
   ////////////////////////////////////
   RealD mass=-4.0; //kills the diagonal term
-  WilsonFermion Dw     (U,     Grid,RBGrid,mass);
+  WilsonFermionR Dw     (U,     Grid,RBGrid,mass);
   Dw.M(phi,Mphi);
 
   ComplexD S = innerProduct(phi,Mphi);