Merge branch 'develop' of https://github.com/paboyle/Grid into specflow

2025-06-12 20:27:06 +01:00 · 2025-04-18 19:55:36 +00:00
parent 570b72a47b e652fc2825
commit cee4c8ce8c
149 changed files with 4714 additions and 3394 deletions
--- a/tests/Test_dwf_dslash_repro.cc
+++ b/tests/Test_dwf_dslash_repro.cc
@ -0,0 +1,239 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_dwf_cg_prec.cc
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+
+#ifndef HOST_NAME_MAX
+#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
+#endif
+
+typedef LatticeFermionD FermionField;
+
+int VerifyOnDevice(const FermionField &res, FermionField &ref)
+{
+  deviceVector<int> Fails(1);
+  int * Fail = &Fails[0];
+  int FailHost=0;
+  
+  typedef typename FermionField::vector_object vobj;
+  typedef typename vobj::scalar_type scalar_type;
+  typedef typename vobj::vector_type vector_type;
+  
+  const uint64_t NN = res.Grid()->oSites();
+
+  acceleratorPut(*Fail,FailHost);
+
+  accelerator_barrier();
+  // Inject an error
+
+  int injection=0;
+  if(getenv("GRID_ERROR_INJECT")) injection=1;
+  autoView(res_v,res,AcceleratorWrite);
+  autoView(ref_v,ref,AcceleratorRead);
+  if ( res.Grid()->ThisRank()== 0 )
+  {
+    if (((random()&0xF)==0)&&injection) {
+      uint64_t sF = random()%(NN);
+      int lane=0;
+      printf("Error injection site %ld on rank %d\n",sF,res.Grid()->ThisRank());
+      auto vv = acceleratorGet(res_v[sF]);
+      double *dd = (double *)&vv;
+      *dd=M_PI;
+      acceleratorPut(res_v[sF],vv);
+    }
+  }
+
+  accelerator_for( sF, NN, vobj::Nsimd(), {
+#ifdef GRID_SIMT
+      {
+        int blane = acceleratorSIMTlane(vobj::Nsimd());
+#else
+      for(int blane;blane<vobj::Nsimd();blane++){
+#endif
+	vector_type *vtrr = (vector_type *)&res_v[sF];
+	vector_type *vtrf = (vector_type *)&ref_v[sF];
+	int words = sizeof(vobj)/sizeof(vector_type);
+	
+	for(int w=0;w<words;w++){
+	  scalar_type rrtmp = getlane(vtrr[w], blane);
+	  scalar_type rftmp = getlane(vtrf[w], blane);
+	  if ( rrtmp != rftmp) {
+	      *Fail=1;
+	  }
+	}
+      }
+  });
+
+  FailHost = acceleratorGet(*Fail);
+
+  return FailHost;
+}
+void PrintFails(const FermionField &res, FermionField &ref,uint64_t *ids)
+{
+  typedef typename FermionField::vector_object vobj;
+
+  const int Nsimd=vobj::Nsimd();
+  const uint64_t NN = res.Grid()->oSites();
+
+  ///////////////////////////////
+  // Pull back to host
+  ///////////////////////////////
+  autoView(res_v,res,CpuRead);
+  autoView(ref_v,ref,CpuRead);
+  
+  std::vector<uint64_t> ids_host(NN*Nsimd);
+  
+  acceleratorCopyFromDevice(ids,&ids_host[0],NN*Nsimd*sizeof(uint64_t));
+
+  //////////////////////////////////////////////////////////////
+  // Redo check on host and print IDs
+  //////////////////////////////////////////////////////////////
+  
+  for(int ss=0;ss< NN; ss++){				
+      int sF = ss;
+      for(int lane=0;lane<Nsimd;lane++){
+	
+	auto rr = extractLane(lane,res_v[sF]);
+	auto rf = extractLane(lane,ref_v[sF]);
+	uint64_t id = ids_host[lane+Nsimd*sF];
+	//	std::cout << GridHostname()<<" id["<<sF<<"] lane "<<lane<<" id "<<id<<std::endl;
+	for(int s=0;s<4;s++){
+	  for(int c=0;c<3;c++){
+	    if ( rr()(s)(c)!=rf()(s)(c) ) {
+	      int subslice=(id>>0 )&0xFF;
+	      int slice   =(id>>8 )&0xFF;
+	      int eu      =(id>>16)&0xFF;
+	      std::cout << GridHostname()<<" miscompare site "<<sF<<" "<<rr()(s)(c)<<" "<<rf()(s)(c)<<" EU "<<eu<<" slice "<<slice<<" subslice "<<subslice<<std::endl;
+	    }
+	  }
+	}
+      }
+  };
+  return;
+}
+
+
+
+int main (int argc, char ** argv)
+{
+  char hostname[HOST_NAME_MAX+1];
+  gethostname(hostname, HOST_NAME_MAX+1);
+  std::string host(hostname);
+  
+  Grid_init(&argc,&argv);
+
+  const int Ls=12;
+
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeGaugeField Umu(UGrid);
+  LatticeFermionD    src(FGrid); random(RNG5,src);
+  LatticeFermionD   junk(FGrid); random(RNG5,junk);
+
+  LatticeFermionD result(FGrid); result=Zero();
+  LatticeFermionD ref(FGrid); ref=Zero();
+  
+  SU<Nc>::HotConfiguration(RNG4,Umu);
+
+  RealD mass=0.1;
+  RealD M5=1.8;
+
+  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+  int nsecs=600;
+  if( GridCmdOptionExists(argv,argv+argc,"--seconds") ){
+    std::string arg = GridCmdOptionPayload(argv,argv+argc,"--seconds");
+    GridCmdOptionInt(arg,nsecs);
+  }
+  
+  std::cout << GridLogMessage << "::::::::::::: Job startup Barrier " << std::endl;
+  UGrid->Barrier();
+  std::cout << GridLogMessage << "::::::::::::: Job startup Barrier complete" << std::endl;
+
+  std::cout << GridLogMessage << "::::::::::::: Starting DWF repro for "<<nsecs <<" seconds" << std::endl;
+
+  time_t now;
+  time_t start = time(NULL);
+  UGrid->Broadcast(0,(void *)&start,sizeof(start));
+
+  FlightRecorder::ContinueOnFail = 0;
+  FlightRecorder::PrintEntireLog = 0;
+  FlightRecorder::ChecksumComms  = 0;
+  FlightRecorder::ChecksumCommsSend=0;
+
+  if(char *s=getenv("GRID_PRINT_ENTIRE_LOG"))  FlightRecorder::PrintEntireLog     = atoi(s);
+  if(char *s=getenv("GRID_CHECKSUM_RECV_BUF")) FlightRecorder::ChecksumComms      = atoi(s);
+  if(char *s=getenv("GRID_CHECKSUM_SEND_BUF")) FlightRecorder::ChecksumCommsSend  = atoi(s);
+
+  const uint64_t NN = FGrid->oSites()*vComplexD::Nsimd();
+  
+  deviceVector<uint64_t> ids_device(NN);
+  uint64_t *ids = &ids_device[0];
+  
+
+  Ddwf.DhopComms(src,ref);
+  Ddwf.DhopCalc(src,ref,ids);
+
+  Ddwf.DhopComms(src,result);
+  
+  int iter=0;
+  do {
+    
+    result=junk;
+
+    Ddwf.DhopCalc(src,result,ids);
+
+    if ( VerifyOnDevice(result, ref) ) {
+      printf("Node %s Iter %d detected fails\n",GridHostname(),iter);
+      PrintFails(result,ref,ids);
+      //      std::cout << " Dslash "<<iter<<" is WRONG! "<<std::endl;
+    }
+    //else {
+    //      printf("Node %s Iter %d detected NO fails\n",GridHostname(),iter);
+    //      PrintFails(result,ref,ids);
+    //      std::cout << " Dslash "<<iter<<" is OK! "<<std::endl;
+    //}
+
+
+    iter ++;
+    now = time(NULL); UGrid->Broadcast(0,(void *)&now,sizeof(now));
+  } while (now < (start + nsecs) );
+
+  
+  Grid_finalize();
+}
--- a/tests/Test_dwf_mixedcg_prec.cc
+++ b/tests/Test_dwf_mixedcg_prec.cc
@ -124,6 +124,8 @@ int main (int argc, char ** argv)

  SchurDiagMooeeOperatorParanoid<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
  SchurDiagMooeeOperatorParanoid<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);
+  //  SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf);
+  //  SchurDiagMooeeOperator<DomainWallFermionF,LatticeFermionF> HermOpEO_f(Ddwf_f);

  int nsecs=600;
  if( GridCmdOptionExists(argv,argv+argc,"--seconds") ){
@ -131,6 +133,10 @@ int main (int argc, char ** argv)
    GridCmdOptionInt(arg,nsecs);
  }
  
+  std::cout << GridLogMessage << "::::::::::::: Job startup Barrier " << std::endl;
+  UGrid->Barrier();
+  std::cout << GridLogMessage << "::::::::::::: Job startup Barrier complete" << std::endl;
+
  std::cout << GridLogMessage << "::::::::::::: Starting mixed CG for "<<nsecs <<" seconds" << std::endl;

  MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
@ -148,7 +154,7 @@ int main (int argc, char ** argv)

  FlightRecorder::ContinueOnFail = 0;
  FlightRecorder::PrintEntireLog = 0;
-  FlightRecorder::ChecksumComms  = 1;
+  FlightRecorder::ChecksumComms  = 0;
  FlightRecorder::ChecksumCommsSend=0;

  if(char *s=getenv("GRID_PRINT_ENTIRE_LOG"))  FlightRecorder::PrintEntireLog     = atoi(s);
@ -180,7 +186,7 @@ int main (int argc, char ** argv)
    iter ++;
    now = time(NULL); UGrid->Broadcast(0,(void *)&now,sizeof(now));
  } while (now < (start + nsecs/10) );
-    
+
  std::cout << GridLogMessage << "::::::::::::: Starting double precision CG" << std::endl;
  ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
  int i=0;
--- a/tests/Test_meson_field.cc
+++ b/tests/Test_meson_field.cc
@ -31,7 +31,7 @@ See the full license in the file "LICENSE" in the top level distribution directo
 using namespace Grid;

 const int TSRC = 0;  //timeslice where rho is nonzero
-const int VDIM = 5; //length of each vector
+const int VDIM = 8; //length of each vector

 typedef typename DomainWallFermionD::ComplexField ComplexField;
 typedef typename DomainWallFermionD::FermionField FermionField;
@ -55,19 +55,26 @@ int main(int argc, char *argv[])
  pRNG.SeedFixedIntegers(seeds);

  // MesonField lhs and rhs vectors
+  const int Nem=1;
  std::vector<FermionField> phi(VDIM,&grid);
-  std::vector<FermionField> rho(VDIM,&grid);
-  FermionField rho_tmp(&grid);
+  std::vector<ComplexField> B0(Nem,&grid);
+  std::vector<ComplexField> B1(Nem,&grid);
  std::cout << GridLogMessage << "Initialising random meson fields" << std::endl;
  for (unsigned int i = 0; i < VDIM; ++i){
    random(pRNG,phi[i]);
-    random(pRNG,rho_tmp); //ideally only nonzero on t=0
-    rho[i] = where((t==TSRC), rho_tmp, 0.*rho_tmp); //ideally only nonzero on t=0
+  }
+  for (unsigned int i = 0; i < Nem; ++i){
+    random(pRNG,B0[i]);
+    random(pRNG,B1[i]);
  }
  std::cout << GridLogMessage << "Meson fields initialised, rho non-zero only for t = " << TSRC << std::endl;

  // Gamma matrices used in the contraction
  std::vector<Gamma::Algebra> Gmu = {
+    Gamma::Algebra::GammaX,
+    Gamma::Algebra::GammaY,
+    Gamma::Algebra::GammaZ,
+    Gamma::Algebra::GammaT,
    Gamma::Algebra::GammaX,
    Gamma::Algebra::GammaY,
    Gamma::Algebra::GammaZ,
@ -78,11 +85,15 @@ int main(int argc, char *argv[])
  std::vector<std::vector<double>> momenta = {
 	  {0.,0.,0.},
 	  {1.,0.,0.},
+	  {-1.,0.,0.},
+	  {0,1.,0.},
+	  {0,-1.,0.},
+	  {0,0,1.},
+	  {0,0,-1.},
 	  {1.,1.,0.},
 	  {1.,1.,1.},
 	  {2.,0.,0.}
  };
-
  std::cout << GridLogMessage << "Meson fields will be created for " << Gmu.size() << " Gamma matrices and " << momenta.size() << " momenta." << std::endl;

  std::cout << GridLogMessage << "Computing complex phases" << std::endl;
@ -102,28 +113,29 @@ int main(int argc, char *argv[])
  std::cout << GridLogMessage << "Computing complex phases done." << std::endl;

  Eigen::Tensor<ComplexD,5, Eigen::RowMajor> Mpp(momenta.size(),Gmu.size(),Nt,VDIM,VDIM);
-  Eigen::Tensor<ComplexD,5, Eigen::RowMajor> Mpr(momenta.size(),Gmu.size(),Nt,VDIM,VDIM);
-  Eigen::Tensor<ComplexD,5, Eigen::RowMajor> Mrr(momenta.size(),Gmu.size(),Nt,VDIM,VDIM);
+  Eigen::Tensor<ComplexD,5, Eigen::RowMajor> App(B0.size(),1,Nt,VDIM,VDIM);

  // timer
  double start,stop;

+  /////////////////////////////////////////////////////////////////////////
  //execute meson field routine
+  /////////////////////////////////////////////////////////////////////////
+  A2Autils<WilsonImplR>::MesonField(Mpp,&phi[0],&phi[0],Gmu,phases,Tp);
  start = usecond();
  A2Autils<WilsonImplR>::MesonField(Mpp,&phi[0],&phi[0],Gmu,phases,Tp);
  stop = usecond();
  std::cout << GridLogMessage << "M(phi,phi) created, execution time " << stop-start << " us" << std::endl;
-  start = usecond();
-  /* Ideally, for this meson field we could pass TSRC (even better a list of timeslices)
-   * to the routine so that all the compnents which are predictably equal to zero are not computed. */
-  A2Autils<WilsonImplR>::MesonField(Mpr,&phi[0],&rho[0],Gmu,phases,Tp);
-  stop = usecond();
-  std::cout << GridLogMessage << "M(phi,rho) created, execution time " << stop-start << " us" << std::endl;
-  start = usecond();
-  A2Autils<WilsonImplR>::MesonField(Mrr,&rho[0],&rho[0],Gmu,phases,Tp);
-  stop = usecond();
-  std::cout << GridLogMessage << "M(rho,rho) created, execution time " << stop-start << " us" << std::endl;

+  /////////////////////////////////////////////////////////////////////////
+  //execute aslash field routine
+  /////////////////////////////////////////////////////////////////////////
+  A2Autils<WilsonImplR>::AslashField(App,&phi[0],&phi[0],B0,B1,Tp);
+  start = usecond();
+  A2Autils<WilsonImplR>::AslashField(App,&phi[0],&phi[0],B0,B1,Tp);
+  stop = usecond();
+  std::cout << GridLogMessage << "Alash(phi,phi) created, execution time " << stop-start << " us" << std::endl;
+  
  std::string FileName = "Meson_Fields";
 #ifdef HAVE_HDF5
  using Default_Reader = Grid::Hdf5Reader;
@ -134,12 +146,11 @@ int main(int argc, char *argv[])
  using Default_Writer = Grid::BinaryWriter;
  FileName.append(".bin");
 #endif
-
-  Default_Writer w(FileName);
-  write(w,"phi_phi",Mpp);
-  write(w,"phi_rho",Mpr);
-  write(w,"rho_rho",Mrr);
-
+  {
+    Default_Writer w(FileName);
+    write(w,"MesonField",Mpp);
+    write(w,"AslashField",App);
+  }
  // epilogue
  std::cout << GridLogMessage << "Grid is finalizing now" << std::endl;
  Grid_finalize();
--- a/tests/core/Test_fft.cc
+++ b/tests/core/Test_fft.cc
@ -39,7 +39,7 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

  Coordinate latt_size   = GridDefaultLatt();
-  Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
  Coordinate mpi_layout  = GridDefaultMpi();

  int vol = 1;
@ -88,6 +88,7 @@ int main (int argc, char ** argv)
  Ctilde=C;
  std::cout<<" Benchmarking FFT of LatticeComplex  "<<std::endl;
  theFFT.FFT_dim(Ctilde,Ctilde,0,FFT::forward); std::cout << theFFT.MFlops()<<" Mflops "<<std::endl;
+  std::cout<<" FFT done "<<std::endl;
  theFFT.FFT_dim(Ctilde,Ctilde,1,FFT::forward); std::cout << theFFT.MFlops()<<" Mflops "<<std::endl;
  theFFT.FFT_dim(Ctilde,Ctilde,2,FFT::forward); std::cout << theFFT.MFlops()<<" Mflops "<<std::endl;
  theFFT.FFT_dim(Ctilde,Ctilde,3,FFT::forward); std::cout << theFFT.MFlops()<<" Mflops "<<std::endl;
@ -278,6 +279,7 @@ int main (int argc, char ** argv)
    
    result5 = result5 - Kinetic;
    std::cout<<"diff "<< norm2(result5)<<std::endl;
+    assert(norm2(result5)<1.0e-4);
    
  }

@ -356,6 +358,7 @@ int main (int argc, char ** argv)
    
    diff = ref - result4;
    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
+    assert(norm2(diff)<1.0e-4);

  }

@ -439,6 +442,7 @@ int main (int argc, char ** argv)
    
    diff = ref - result4;
    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
+    assert(norm2(diff)<1.0e-4);

  }

--- a/tests/core/Test_fft_pf.cc
+++ b/tests/core/Test_fft_pf.cc
@ -38,7 +38,7 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

  Coordinate latt_size   = GridDefaultLatt();
-  Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
  Coordinate mpi_layout  = GridDefaultMpi();

  int vol = 1;
@ -74,7 +74,7 @@ int main (int argc, char ** argv)
  
  {
    std::cout<<"****************************************"<<std::endl;
-    std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator \n";
+    std::cout << "Testing OverlapWilsonPartialFractionTanhFermionD Hw kernel Mom space 4d propagator \n";
    std::cout<<"****************************************"<<std::endl;

    //    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
@ -88,7 +88,7 @@ int main (int argc, char ** argv)

    RealD mass=0.1;
    RealD M5  =0.8;
-    OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
+    OverlapWilsonPartialFractionTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);

    // Momentum space prop
    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
@ -119,9 +119,10 @@ int main (int argc, char ** argv)
    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
    Dov.Mdag(src5,tmp5);
    src5=tmp5;
-    MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
+    MdagMLinearOperator<OverlapWilsonPartialFractionTanhFermionD,LatticeFermionD> HermOp(Dov);
    ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
    CG(HermOp,src5,result5);
+    std::cout << " Solved by Conjugate Gradient (CGNE)" <<std::endl;
    ////////////////////////////////////////////////////////////////////////
    // Domain wall physical field propagator
    ////////////////////////////////////////////////////////////////////////
@ -153,7 +154,7 @@ int main (int argc, char ** argv)
  ////////////////////////////////////////////////////
  {
    std::cout<<"****************************************"<<std::endl;
-    std::cout << "Testing Dov(Hw) Mom space 4d propagator \n";
+    std::cout << "Testing OverlapWilsonCayleyTanhFermionD space 4d propagator \n";
    std::cout<<"****************************************"<<std::endl;

    LatticeFermionD    tmp(&GRID);
@ -228,7 +229,7 @@ int main (int argc, char ** argv)
  
  {
    std::cout<<"****************************************"<<std::endl;
-    std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator with q\n";
+    std::cout<<"Testing OverlapWilsonPartialFractionTanhFermionD Hw kernel Mom space 4d propagator with q\n";
    std::cout<<"****************************************"<<std::endl;

    //    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
@ -242,7 +243,9 @@ int main (int argc, char ** argv)

    RealD mass=0.1;
    RealD M5  =0.8;
-    OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,0.001,8.0);
+    OverlapWilsonPartialFractionTanhFermionD Dov(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5,1.0);
+    std::vector<RealD> qmu({1.0,0.0,0.0,0.0});
+    Dov.set_qmu(qmu);

    // Momentum space prop
    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
@ -273,7 +276,7 @@ int main (int argc, char ** argv)
    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
    Dov.Mdag(src5,tmp5);
    src5=tmp5;
-    MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
+    MdagMLinearOperator<OverlapWilsonPartialFractionTanhFermionD,LatticeFermionD> HermOp(Dov);
    ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
    CG(HermOp,src5,result5);
    ////////////////////////////////////////////////////////////////////////
--- a/tests/core/Test_fftf.cc
+++ b/tests/core/Test_fftf.cc
@ -39,7 +39,8 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

  Coordinate latt_size   = GridDefaultLatt();
-  Coordinate simd_layout( { vComplexF::Nsimd(),1,1,1});
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
+  //  Coordinate simd_layout( { vComplexF::Nsimd(),1,1,1});
  Coordinate mpi_layout  = GridDefaultMpi();

  int vol = 1;
--- a/tests/core/Test_gparity.cc
+++ b/tests/core/Test_gparity.cc
@ -54,6 +54,7 @@ static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIM

 int main (int argc, char ** argv)
 {
+#ifdef ENABLE_GPARITY
  int nu = 0;
  int tbc_aprd = 0; //use antiperiodic BCs in the time direction?
  
@ -325,4 +326,5 @@ int main (int argc, char ** argv)
  

  Grid_finalize();
+#endif
 }
--- a/tests/core/Test_gparity_flavour.cc
+++ b/tests/core/Test_gparity_flavour.cc
@ -30,6 +30,7 @@ See the full license in the file "LICENSE" in the top level distribution directo

 using namespace Grid;

+#ifdef ENABLE_GPARITY
 static constexpr double                      tolerance = 1.0e-6;
 static std::array<GparityFlavourMatrix, GparityFlavour::nSigma> testAlgebra;

@ -148,11 +149,12 @@ void checkSigma(const GparityFlavour::Algebra a, GridSerialRNG &rng)
  test(m*g, m*testg);
  std::cout << std::endl;
 }
+#endif

 int main(int argc, char *argv[])
 {
  Grid_init(&argc,&argv);
-  
+#ifdef ENABLE_GPARITY  
  Coordinate latt_size   = GridDefaultLatt();
  Coordinate simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
  Coordinate mpi_layout  = GridDefaultMpi();
@ -170,7 +172,7 @@ int main(int argc, char *argv[])
    checkSigma(i, sRNG);
  }
  std::cout << GridLogMessage << std::endl;
-  
+#endif  
  Grid_finalize();
  
  return EXIT_SUCCESS;
--- a/tests/core/Test_gpwilson_even_odd.cc
+++ b/tests/core/Test_gpwilson_even_odd.cc
@ -35,7 +35,7 @@ using namespace Grid;
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);
-
+#ifdef ENABLE_GPARITY
  Coordinate latt_size   = GridDefaultLatt();
  Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
  Coordinate mpi_layout  = GridDefaultMpi();
@ -216,6 +216,6 @@ int main (int argc, char ** argv)

  std::cout<<GridLogMessage <<"pDce - conj(cDpo) "<< pDco-conj(cDpo) <<std::endl;
  std::cout<<GridLogMessage <<"pDco - conj(cDpe) "<< pDce-conj(cDpe) <<std::endl;
-  
+#endif  
  Grid_finalize();
 }
--- a/tests/core/Test_memory_manager.cc
+++ b/tests/core/Test_memory_manager.cc
@ -93,7 +93,7 @@ void  MemoryTest(GridCartesian         * FGrid, int N)
 	if ( dev ) { 
 	  autoView(A_v,A[v],AcceleratorRead);
 	  accelerator_for(ss,FGrid->oSites(),1,{
-	      assert(B[v]==A_v[ss]()()().getlane(0));
+	      //	      assert(B[v]==A_v[ss]()()().getlane(0));
 	    });
 	  //	std::cout << "["<<v<<"] checked on GPU"<<B[v]<<std::endl;
 	} else {
--- a/tests/core/Test_sliceSum.cc
+++ b/tests/core/Test_sliceSum.cc
@ -23,8 +23,8 @@ template<class vobj> inline void sliceSumCPU(const Grid::Lattice<vobj> &Data,std
  int ld=grid->_ldimensions[orthogdim];
  int rd=grid->_rdimensions[orthogdim];

-  Vector<vobj> lvSum(rd); // will locally sum vectors first
-  Vector<sobj> lsSum(ld,Zero());                    // sum across these down to scalars
+  std::vector<vobj> lvSum(rd); // will locally sum vectors first
+  std::vector<sobj> lsSum(ld,Zero());                    // sum across these down to scalars
  ExtractBuffer<sobj> extracted(Nsimd);                  // splitting the SIMD

  result.resize(fd); // And then global sum to return the same vector to every node 
--- a/tests/core/Test_uvm.cc
+++ b/tests/core/Test_uvm.cc
@ -0,0 +1,106 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_memory_manager.cc
+
+    Copyright (C) 2022
+
+Author: Peter Boyle <pboyle@bnl.gov>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+
+const int64_t Pages=32;
+const int64_t PageWords=4096/sizeof(ComplexD);
+const int64_t VecWords=PageWords*Pages;
+const int64_t N=10000;
+
+class Tester {
+public:
+  Vector<ComplexD>      zero_uvm;
+  std::vector<ComplexD> zero_host;
+  std::vector<Vector<ComplexD> > A;
+  std::vector<std::vector<ComplexD> > B;
+  uint64_t counter;
+
+  Tester() :
+    zero_uvm(VecWords,ComplexD(0.0)),
+    zero_host(VecWords,ComplexD(0.0)),
+    A(N,zero_uvm),
+    B(N,zero_host)
+  { counter = 0; }
+
+  void  MemoryTest(int N)
+  {
+    for(int epoch = 0;epoch<100000;epoch++){
+      
+      int p  = random() %Pages; // Which address/page to hit
+      int v  = random() %N; // Which vec
+      int w  = random() %2; // Write or read
+      int dev= random() %2; // On device?
+      //    int e=1;
+      ComplexD zc = counter++;
+      
+      if ( w ) {
+	B[v][p*PageWords] = B[v][p*PageWords] + zc;
+	if ( dev ) { 
+	  ComplexD *A_v=&A[v][0];
+	  accelerator_for(ss,1,1,{
+	      A_v[p*PageWords] = A_v[p*PageWords] + zc;
+	    });
+	} else {
+	  A[v][p*PageWords] = A[v][p*PageWords] + zc;
+	}
+      } else {
+	if ( dev ) { 
+	  ComplexD *A_v=&A[v][0];
+	  ComplexD ref = B[v][p*PageWords];
+	  std::cout << "Device compare "<<B[v][p*PageWords]<<std::endl;
+	  accelerator_for(ss,1,1,{
+	      assert(ref==A_v[p*PageWords]);
+	    });
+	} else {
+	  std::cout << "Host compare "<<B[v][p*PageWords]<<std::endl;
+	  assert(B[v][p*PageWords]==A[v][p*PageWords]);
+	}
+      }
+    }
+    
+  }
+
+};
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  Tester test;
+
+  for(int i=0;i<N;i++){
+    std::cout << "============================"<<std::endl;
+    std::cout << "Epoch "<<i<<"/"<<N<<std::endl;
+    std::cout << "============================"<<std::endl;
+    test.MemoryTest(32);
+  }
+  Grid_finalize();
+}
+
--- a/tests/debug/Test_general_coarse_pvdagm.cc
+++ b/tests/debug/Test_general_coarse_pvdagm.cc
@ -47,20 +47,20 @@ public:
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
-    std::cout << "Op: PVdag M "<<std::endl;
+    //    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
  }
  void AdjOp     (const Field &in, Field &out){
-    std::cout << "AdjOp: Mdag PV "<<std::endl;
+    //    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(in,tmp);
    _Mat.Mdag(tmp,out);
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
-    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
+    //    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    //    _Mat.M(in,tmp);
    //    _PV.Mdag(tmp,out);
@ -83,14 +83,14 @@ public:
  void OpDir  (const Field &in, Field &out,int dir,int disp) {    assert(0);  }
  void OpDirAll  (const Field &in, std::vector<Field> &out){    assert(0);  };
  void Op     (const Field &in, Field &out){
-    std::cout << "Op: PVdag M "<<std::endl;
+    //    std::cout << "Op: PVdag M "<<std::endl;
    Field tmp(in.Grid());
    _Mat.M(in,tmp);
    _PV.Mdag(tmp,out);
    out = out + shift * in;
  }
  void AdjOp     (const Field &in, Field &out){
-    std::cout << "AdjOp: Mdag PV "<<std::endl;
+    //    std::cout << "AdjOp: Mdag PV "<<std::endl;
    Field tmp(in.Grid());
    _PV.M(tmp,out);
    _Mat.Mdag(in,tmp);
@ -98,7 +98,7 @@ public:
  }
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
  void HermOp(const Field &in, Field &out){
-    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
+    //    std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
    Field tmp(in.Grid());
    Op(in,tmp);
    AdjOp(tmp,out);
@ -154,6 +154,8 @@ public:
    //    std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
    double t;
    // Fine Smoother
+    //    out = in;
+    out = Zero();
    t=-usecond();
    _PreSmoother(in,out);
    t+=usecond();
@ -172,6 +174,7 @@ public:

    // Coarse correction
    t=-usecond();
+    Csol = Zero();
    _CoarseSolve(Csrc,Csol);
    //Csol=Zero();
    t+=usecond();
@ -191,6 +194,8 @@ public:

    // Fine Smoother
    t=-usecond();
+    //    vec2=vec1;
+    vec2=Zero();
    _PostSmoother(vec1,vec2);
    t+=usecond();
    std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
@ -215,7 +220,8 @@ int main (int argc, char ** argv)
  // Construct a coarsened grid
  Coordinate clatt = GridDefaultLatt();
  for(int d=0;d<clatt.size();d++){
-    clatt[d] = clatt[d]/4;
+    clatt[d] = clatt[d]/2;
+    //    clatt[d] = clatt[d]/4;
  }
  GridCartesian *Coarse4d =  SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
  GridCartesian *Coarse5d =  SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
@ -244,7 +250,7 @@ int main (int argc, char ** argv)
  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
  DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);

-  const int nbasis = 8;
+  const int nbasis = 20;
  const int cb = 0 ;
  LatticeFermion prom(FGrid);

@ -260,7 +266,25 @@ int main (int argc, char ** argv)
  typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
  typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
  PVdagM_t PVdagM(Ddwf,Dpv);
-  ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv);
+  //  ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv); // 355
+  //  ShiftedPVdagM_t ShiftedPVdagM(1.0,Ddwf,Dpv); // 246
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.5,Ddwf,Dpv); // 183
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 145
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 134
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 127 -- NULL space via inverse iteration
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 57 -- NULL space via inverse iteration; 3 iterations
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 57 , tighter inversion
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 49 iters
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 70 iters; asymmetric 
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 58; Loosen coarse, tighten fine
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 56 ... 
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 51 ...  with 24 vecs
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 31 ...  with 24 vecs and 2^4 blocking
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 43 ...  with 16 vecs and 2^4 blocking, sloppier
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35  ...  with 20 vecs and 2^4 blocking, looser coarse
+  //  ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 64  ...  with 20 vecs, Christoph setup, and 2^4 blocking, looser coarse
+  ShiftedPVdagM_t ShiftedPVdagM(0.01,Ddwf,Dpv); // 


  // Run power method on HOA??
@ -269,6 +293,7 @@ int main (int argc, char ** argv)
  // Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
  typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
  Subspace AggregatesPD(Coarse5d,FGrid,cb);
+  /*
  AggregatesPD.CreateSubspaceChebyshev(RNG5,
 				       PVdagM,
 				       nbasis,
@ -278,6 +303,10 @@ int main (int argc, char ** argv)
 				       200,
 				       200,
 				       0.0);
+  */
+  AggregatesPD.CreateSubspaceGCR(RNG5,
+				 PVdagM,
+				 nbasis);
  
  LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d);
  LittleDiracOpPV.CoarsenOperator(PVdagM,AggregatesPD);
@ -334,12 +363,13 @@ int main (int argc, char ** argv)
  ///////////////////////////////////////

  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Coarse Grid Solve "<<std::endl;
+  std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
  std::cout<<GridLogMessage<<"******************* "<<std::endl;
  TrivialPrecon<CoarseVector> simple;
  NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
-  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-8, 100, LinOpCoarse,simple,10,10); 
-  L2PGCR.Level(2);
+  //  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10); 
+  PrecGeneralisedConjugateResidualNonHermitian<CoarseVector>  L2PGCR(3.0e-2, 100, LinOpCoarse,simple,10,10); 
+  L2PGCR.Level(3);
  c_res=Zero();
  L2PGCR(c_src,c_res);

@ -347,11 +377,12 @@ int main (int argc, char ** argv)
  // Fine grid smoother
  ////////////////////////////////////////
  std::cout<<GridLogMessage<<"******************* "<<std::endl;
-  std::cout<<GridLogMessage<<" Fine Grid Smoother "<<std::endl;
+  std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
  std::cout<<GridLogMessage<<"******************* "<<std::endl;
  TrivialPrecon<LatticeFermionD> simple_fine;
  //  NonHermitianLinearOperator<PVdagM_t,LatticeFermionD> LinOpSmooth(PVdagM);
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,10,ShiftedPVdagM,simple_fine,4,4);
+  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedPVdagM,simple_fine,16,16);
+  SmootherGCR.Level(2);
  
  LatticeFermionD f_src(FGrid);
  LatticeFermionD f_res(FGrid);
@ -364,12 +395,12 @@ int main (int argc, char ** argv)

  TwoLevelMG TwoLevelPrecon(AggregatesPD,
 			    PVdagM,
-			    SmootherGCR,
+			    simple_fine,
 			    SmootherGCR,
 			    LinOpCoarse,
 			    L2PGCR);
  
-  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,8,8);
+  PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,16,16);
  L1PGCR.Level(1);

  f_res=Zero();
--- a/tests/qdpxx/Test_qdpxx_munprec.cc
+++ b/tests/qdpxx/Test_qdpxx_munprec.cc
@ -1,7 +1,6 @@
    /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 
-
    Source file: ./tests/qdpxx/Test_qdpxx_munprec.cc

    Copyright (C) 2015
@ -26,13 +25,17 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
+#include <chroma.h>
+#include <actions/ferm/invert/syssolver_linop_cg_array.h>
+#include <actions/ferm/invert/syssolver_linop_aggregate.h>
+
 #include <Grid/Grid.h>

 int    Ls=8;
 double M5=1.6;
 double mq=0.01;
-double zolo_lo = 0.1;
-double zolo_hi = 2.0;
+double zolo_lo = 0.01;
+double zolo_hi = 7.0;
 double mobius_scale=2.0;

 enum ChromaAction {
@ -55,11 +58,6 @@ enum ChromaAction {
 void calc_grid      (ChromaAction action,Grid::LatticeGaugeField & lat, Grid::LatticeFermion &src, Grid::LatticeFermion &res,int dag);
 void calc_chroma    (ChromaAction action,Grid::LatticeGaugeField & lat, Grid::LatticeFermion &src, Grid::LatticeFermion &res,int dag);

-#include <chroma.h>
-#include <actions/ferm/invert/syssolver_linop_cg_array.h>
-#include <actions/ferm/invert/syssolver_linop_aggregate.h>
-
-

 namespace Chroma { 

@ -81,7 +79,7 @@ public:

    std::vector<int> x(4);
    QDP::multi1d<int> cx(4);
-    std::vector<int> gd= gr.Grid()->GlobalDimensions();
+    Grid::Coordinate gd = gr.Grid()->GlobalDimensions();

    for (x[0]=0;x[0]<gd[0];x[0]++){
    for (x[1]=0;x[1]<gd[1];x[1]++){
@ -124,7 +122,7 @@ public:

    std::vector<int> x(5);
    QDP::multi1d<int> cx(4);
-    std::vector<int> gd= gr.Grid()->GlobalDimensions();
+    Grid::Coordinate gd= gr.Grid()->GlobalDimensions();

    for (x[0]=0;x[0]<gd[0];x[0]++){
    for (x[1]=0;x[1]<gd[1];x[1]++){
@ -166,7 +164,7 @@ public:

    std::vector<int> x(5);
    QDP::multi1d<int> cx(4);
-    std::vector<int> gd= gr.Grid()->GlobalDimensions();
+    Grid::Coordinate gd= gr.Grid()->GlobalDimensions();

    for (x[0]=0;x[0]<gd[0];x[0]++){
    for (x[1]=0;x[1]<gd[1];x[1]++){
@ -304,7 +302,30 @@ public:
     //     param.approximation_type=COEFF_TYPE_TANH_UNSCALED;
     //     param.approximation_type=COEFF_TYPE_TANH;
     param.tuning_strategy_xml=
-"<TuningStrategy><Name>OVEXT_CONSTANT_STRATEGY</Name></TuningStrategy>\n";
+"<TuningStrategy><Name>OVEXT_CONSTANT_STRATEGY</Name><TuningConstant>1.0</TuningConstant></TuningStrategy>\n";
+     UnprecOvExtFermActArray S_f(cfs,param);
+     Handle< FermState<T4,U,U> > fs( S_f.createState(u) );
+     Handle< LinearOperatorArray<T4> > M(S_f.linOp(fs));
+     return M;
+   }
+   if ( parms == HwPartFracTanh ) {
+     if ( Ls%2 == 0 ) { 
+       printf("Ls is not odd\n");
+       exit(-1);
+     }
+     UnprecOvExtFermActArrayParams param;
+     param.OverMass=M5; 
+     param.Mass=_mq;
+     param.RatPolyDeg = Ls;
+     param.ApproxMin =eps_lo;
+     param.ApproxMax =eps_hi;
+     param.b5 =1.0;
+     param.c5 =1.0;
+     //     param.approximation_type=COEFF_TYPE_ZOLOTAREV;
+     param.approximation_type=COEFF_TYPE_TANH_UNSCALED;
+     //param.approximation_type=COEFF_TYPE_TANH;
+     param.tuning_strategy_xml=
+       "<TuningStrategy><Name>OVEXT_CONSTANT_STRATEGY</Name><TuningConstant>1.0</TuningConstant></TuningStrategy>\n";
     UnprecOvExtFermActArray S_f(cfs,param);
     Handle< FermState<T4,U,U> > fs( S_f.createState(u) );
     Handle< LinearOperatorArray<T4> > M(S_f.linOp(fs));
@ -316,7 +337,35 @@ public:
     param.ApproxMin=eps_lo;
     param.ApproxMax=eps_hi;
     param.approximation_type=COEFF_TYPE_ZOLOTAREV;
-     param.RatPolyDeg=Ls;
+     param.RatPolyDeg=Ls-1;
+     // The following is why I think Chroma made some directional errors:
+     param.AuxFermAct= std::string(
+"<AuxFermAct>\n"
+"  <FermAct>UNPRECONDITIONED_WILSON</FermAct>\n"
+"  <Mass>-1.8</Mass>\n"
+"  <b5>1</b5>\n"
+"  <c5>0</c5>\n"
+"  <MaxCG>1000</MaxCG>\n"
+"  <RsdCG>1.0e-9</RsdCG>\n"
+"  <FermionBC>\n"
+"      <FermBC>SIMPLE_FERMBC</FermBC>\n"
+"      <boundary>1 1 1 1</boundary>\n"
+"   </FermionBC> \n"
+"</AuxFermAct>"
+);
+     param.AuxFermActGrp= std::string("");
+     UnprecOvlapContFrac5DFermActArray S_f(fbc,param);
+     Handle< FermState<T4,U,U> > fs( S_f.createState(u) );
+     Handle< LinearOperatorArray<T4> > M(S_f.linOp(fs));
+     return  M;
+   }
+   if ( parms == HwContFracTanh ) {
+     UnprecOvlapContFrac5DFermActParams param;
+     param.Mass=_mq; // How is M5 set? Wilson mass In AuxFermAct
+     param.ApproxMin=eps_lo;
+     param.ApproxMax=eps_hi;
+     param.approximation_type=COEFF_TYPE_TANH_UNSCALED;
+     param.RatPolyDeg=Ls-1;
     // The following is why I think Chroma made some directional errors:
     param.AuxFermAct= std::string(
 "<AuxFermAct>\n"
@ -378,7 +427,14 @@ int main (int argc,char **argv )
   * Setup QDP
   *********************************************************/
  Chroma::initialize(&argc,&argv);
-  Chroma::WilsonTypeFermActs4DEnv::registerAll(); 
+  //  Chroma::WilsonTypeFermActs4DEnv::registerAll(); 
+  Chroma::WilsonTypeFermActsEnv::registerAll(); 
+  //bool linkageHack(void)
+  //{
+  //  bool foo = true;
+  // Inline Measurements
+  //  InlineAggregateEnv::registerAll();
+  //  GaugeInitEnv::registerAll();

  /********************************************************
   * Setup Grid
@ -388,26 +444,34 @@ int main (int argc,char **argv )
                                                                       Grid::GridDefaultSimd(Grid::Nd,Grid::vComplex::Nsimd()),
                                                                       Grid::GridDefaultMpi());
  
-  std::vector<int> gd = UGrid->GlobalDimensions();
+  Grid::Coordinate gd = UGrid->GlobalDimensions();
  QDP::multi1d<int> nrow(QDP::Nd);
  for(int mu=0;mu<4;mu++) nrow[mu] = gd[mu];

  QDP::Layout::setLattSize(nrow);
  QDP::Layout::create();

-  Grid::GridCartesian         * FGrid   = Grid::SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
-  Grid::LatticeGaugeField lat(UGrid);
-  Grid::LatticeFermion    src(FGrid);
-  Grid::LatticeFermion    res_chroma(FGrid);
-  Grid::LatticeFermion    res_grid  (FGrid);
-  
  std::vector<ChromaAction> ActionList({
 		 HtCayleyTanh, // Plain old DWF.
 		 HmCayleyTanh,
 		 HwCayleyTanh,
 		 HtCayleyZolo, // Plain old DWF.
 		 HmCayleyZolo,
-		 HwCayleyZolo
+		 HwCayleyZolo,
+		 HwPartFracZolo,
+		 HwContFracZolo,
+		 HwContFracTanh
+  });
+  std::vector<int> LsList({
+      8,//HtCayleyTanh, // Plain old DWF.
+      8,//HmCayleyTanh,
+      8,//HwCayleyTanh,
+      8,//HtCayleyZolo, // Plain old DWF.
+      8,//HmCayleyZolo,
+      8,//HwCayleyZolo,
+      9,//HwPartFracZolo
+      9, //HwContFracZolo
+      9 //HwContFracTanh
  });
  std::vector<std::string> ActionName({
        "HtCayleyTanh",
@ -415,10 +479,19 @@ int main (int argc,char **argv )
 	"HwCayleyTanh",
 	"HtCayleyZolo",
 	"HmCayleyZolo",
-        "HwCayleyZolo"
+        "HwCayleyZolo",
+	"HwPartFracZolo",
+	"HwContFracZolo",
+	"HwContFracTanh"
  });

  for(int i=0;i<ActionList.size();i++) {
+    Ls = LsList[i];
+    Grid::GridCartesian      * FGrid   = Grid::SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+    Grid::LatticeGaugeField lat(UGrid);
+    Grid::LatticeFermion    src(FGrid);
+    Grid::LatticeFermion    res_chroma(FGrid);
+    Grid::LatticeFermion    res_grid  (FGrid);
    std::cout << "*****************************"<<std::endl;
    std::cout << "Action "<<ActionName[i]<<std::endl;
    std::cout << "*****************************"<<std::endl;
@ -439,6 +512,7 @@ int main (int argc,char **argv )
      
      std::cout << "Norm of difference "<<Grid::norm2(res_chroma)<<std::endl;
    }
+    delete FGrid;
  }

  std::cout << "Finished test "<<std::endl;
@ -502,7 +576,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF
  Grid::gaussian(RNG5,src);
  Grid::gaussian(RNG5,res);

-  Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
+  Grid::SU<Grid::Nc>::HotConfiguration(RNG4,Umu);

  /*
  Grid::LatticeColourMatrix U(UGrid);
@ -519,7 +593,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF

  if ( action == HtCayleyTanh ) { 

-    Grid::DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5);
+    Grid::DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5);

    std::cout << Grid::GridLogMessage <<" Calling domain wall multiply "<<std::endl;

@ -535,7 +609,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF

    Grid::Real _b = 0.5*(mobius_scale +1.0);
    Grid::Real _c = 0.5*(mobius_scale -1.0);
-    Grid::MobiusZolotarevFermionR D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,_b,_c,zolo_lo,zolo_hi);
+    Grid::MobiusZolotarevFermionD D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,_b,_c,zolo_lo,zolo_hi);

    std::cout << Grid::GridLogMessage <<" Calling mobius zolo multiply "<<std::endl;

@ -549,7 +623,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF

  if ( action == HtCayleyZolo ) {

-    Grid::ShamirZolotarevFermionR D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,zolo_lo,zolo_hi);
+    Grid::ShamirZolotarevFermionD D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,zolo_lo,zolo_hi);

    std::cout << Grid::GridLogMessage <<" Calling shamir zolo multiply "<<std::endl;

@ -561,6 +635,60 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF
    return;
  }

+  if ( action == HwPartFracTanh ) {
+
+    Grid::OverlapWilsonPartialFractionTanhFermionD Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,1.0);
+
+    std::cout << Grid::GridLogMessage <<" Calling part frac tanh multiply "<<std::endl;
+
+    if ( dag ) 
+      Dov.Mdag(src,res);  
+    else 
+      Dov.M(src,res);  
+
+    return;
+  }
+
+  if ( action == HwContFracTanh ) {
+
+    Grid::OverlapWilsonContFracTanhFermionD Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,1.0);
+
+    std::cout << Grid::GridLogMessage <<" Calling cont frac tanh multiply "<<std::endl;
+
+    if ( dag ) 
+      Dov.Mdag(src,res);  
+    else 
+      Dov.M(src,res);  
+
+    return;
+  }
+  if ( action == HwContFracZolo ) {
+
+    Grid::OverlapWilsonContFracZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,zolo_lo,zolo_hi);
+
+    std::cout << Grid::GridLogMessage <<" Calling cont frac zolo multiply "<<std::endl;
+
+    if ( dag ) 
+      Dov.Mdag(src,res);  
+    else 
+      Dov.M(src,res);  
+
+    return;
+  }
+
+  if ( action == HwPartFracZolo ) {
+
+    Grid::OverlapWilsonPartialFractionZolotarevFermionD Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,zolo_lo,zolo_hi);
+    std::cout << Grid::GridLogMessage <<" Calling part frac zolotarev multiply "<<std::endl;
+
+    if ( dag ) 
+      Dov.Mdag(src,res);  
+    else 
+      Dov.M(src,res);  
+
+    return;
+  }
+  
  /*
  if ( action == HmCayleyTanh ) {
    Grid::Real _b = 0.5*(mobius_scale +1.0);
@ -581,7 +709,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF

  if ( action == HmCayleyTanh ) {

-    Grid::ScaledShamirFermionR D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,mobius_scale);
+    Grid::ScaledShamirFermionD D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,mobius_scale);

    std::cout << Grid::GridLogMessage <<" Calling scaled shamir multiply "<<std::endl;

@ -595,7 +723,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF

  if ( action == HwCayleyTanh ) {

-    Grid::OverlapWilsonCayleyTanhFermionR D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,1.0);
+    Grid::OverlapWilsonCayleyTanhFermionD D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,1.0);

    if ( dag ) 
      D.Mdag(src,res);  
@ -607,7 +735,7 @@ void calc_grid(ChromaAction action,Grid::LatticeGaugeField & Umu, Grid::LatticeF

  if ( action == HwCayleyZolo ) {

-    Grid::OverlapWilsonCayleyZolotarevFermionR D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,zolo_lo,zolo_hi);
+    Grid::OverlapWilsonCayleyZolotarevFermionD D(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,_mass,_M5,zolo_lo,zolo_hi);

    if ( dag ) 
      D.Mdag(src,res);  
--- a/tests/solver/Test_dwf_cg_prec.cc
+++ b/tests/solver/Test_dwf_cg_prec.cc
@ -1,4 +1,4 @@
-/*************************************************************************************
+*************************************************************************************

 Grid physics library, www.github.com/paboyle/Grid

@ -67,7 +67,13 @@ int main(int argc, char** argv) {
  result = Zero();
  LatticeGaugeField Umu(UGrid);

+#if 0
+  FieldMetaData header;
+  std::string file("ckpoint_lat.4000");
+  NerscIO::readConfiguration(Umu,header,file);
+#else  
  SU<Nc>::HotConfiguration(RNG4, Umu);
+#endif

  std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
            << "   Ls: " << Ls << std::endl;
--- a/tests/solver/Test_dwf_cg_unprec.cc
+++ b/tests/solver/Test_dwf_cg_unprec.cc
@ -54,15 +54,30 @@ int main (int argc, char ** argv)
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);

+  std::vector<ComplexD> qmu;
+  qmu.push_back(ComplexD(0.1,0.0));
+  qmu.push_back(ComplexD(0.0,0.0));
+  qmu.push_back(ComplexD(0.0,0.0));
+  qmu.push_back(ComplexD(0.0,0.01));
+  
+
  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);

+  LatticeFermion    tmp(FGrid);
  LatticeFermion    src(FGrid); random(RNG5,src);
  LatticeFermion result(FGrid); result=Zero();
-  LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
-
+  LatticeGaugeField Umu(UGrid); 
+#if 0
+  FieldMetaData header;
+  std::string file("ckpoint_lat.4000");
+  NerscIO::readConfiguration(Umu,header,file);
+#else  
+  SU<Nc>::HotConfiguration(RNG4,Umu);
+#endif
+  
  std::vector<LatticeColourMatrix> U(4,UGrid);
  for(int mu=0;mu<Nd;mu++){
    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
@ -71,8 +86,15 @@ int main (int argc, char ** argv)
  RealD mass=0.1;
  RealD M5=1.8;
  DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  Ddwf.qmu = qmu;

+  Ddwf.M(src,tmp);
+  std::cout << " |M src|^2 "<<norm2(tmp)<<std::endl;
  MdagMLinearOperator<DomainWallFermionD,LatticeFermion> HermOp(Ddwf);
+  HermOp.HermOp(src,tmp);
+
+  std::cout << " <src|MdagM| src> "<<innerProduct(src,tmp)<<std::endl;
+  
  ConjugateGradient<LatticeFermion> CG(1.0e-6,10000);
  CG(HermOp,src,result);

--- a/tests/sp2n/Test_2as_base.cc
+++ b/tests/sp2n/Test_2as_base.cc
@ -87,8 +87,8 @@ static void run_generators_checks() {
    typedef typename Sp_TwoIndex<this_nc, S>::template iGroupMatrix<Complex> Matrix;
    int sum = 0;
    int sum_im = 0;
-    Vector<Matrix> ta_fund(this_algebra_dim);
-    Vector<Matrix> eij(this_irrep_dim);
+    std::vector<Matrix> ta_fund(this_algebra_dim);
+    std::vector<Matrix> eij(this_irrep_dim);
    Matrix tmp_l;
    Matrix tmp_r;
    for (int n = 0; n < this_algebra_dim; n++)