Merge branch 'feature/gparity_HMC' of https://github.com/paboyle/Grid into gparity_HMC

2026-03-04 19:46:13 +00:00 · 2021-11-16 07:32:47 -08:00
parent a1211cdcce e78acf77ff
commit 6d26a2a1ad
4 changed files with 900 additions and 9 deletions
--- a/Grid/algorithms/iterative/LocalCoherenceLanczos.h
+++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h
@@ -44,6 +44,7 @@ public:
 				  int, MinRes);    // Must restart
 };

+//This class is the input parameter class for some testing programs
 struct LocalCoherenceLanczosParams : Serializable {
 public:
  GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
@@ -153,6 +154,7 @@ public:
      _coarse_relax_tol(coarse_relax_tol)  
  {    };

+  //evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
  int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
  {
    CoarseField v(B);
@@ -179,8 +181,16 @@ public:
    if( (vv<eresid*eresid) ) conv = 1;
    return conv;
  }
-  int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
+
+  //This function is called at the end of the coarse grid Lanczos. It promotes the coarse eigenvector 'B' to the fine grid,
+  //applies a smoother to the result then computes the computes the *fine grid* eigenvalue (output as 'eval').
+
+  //evalMaxApprox should be the approximation of the largest eval of the fine Hermop. However when this function is called by IRL it actually passes the largest eval of the *Chebyshev* operator (as this is the max approx used for the TestConvergence above)
+  //As the largest eval of the Chebyshev is typically several orders of magnitude larger this makes the convergence test pass even when it should not.
+  //We therefore ignore evalMaxApprox here and use a value of 1.0 (note this value is already used by TestCoarse)
+  int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)  
  {
+    evalMaxApprox = 1.0; //cf above
    GridBase *FineGrid = _subspace[0].Grid();    
    int checkerboard   = _subspace[0].Checkerboard();
    FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
@@ -199,13 +209,13 @@ public:
    eval   = vnum/vden;
    fv -= eval*fB;
    RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
-
+    if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
+    
    std::cout.precision(13);
    std::cout<<GridLogIRL  << "[" << std::setw(3)<<j<<"] "
 	     <<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
-	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
+	     <<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv << " target " << eresid*eresid
 	     <<std::endl;
-    if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
    if( (vv<eresid*eresid) ) return 1;
    return 0;
  }
@@ -283,6 +293,10 @@ public:
    evals_coarse.resize(0);
  };

+  //The block inner product is the inner product on the fine grid locally summed over the blocks
+  //to give a Lattice<Scalar> on the coarse grid. This function orthnormalizes the fine-grid subspace
+  //vectors under the block inner product. This step must be performed after computing the fine grid
+  //eigenvectors and before computing the coarse grid eigenvectors.    
  void Orthogonalise(void ) {
    CoarseScalar InnerProd(_CoarseGrid);
    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
@@ -326,6 +340,8 @@ public:
    }
  }

+  //While this method serves to check the coarse eigenvectors, it also recomputes the eigenvalues from the smoothed reconstructed eigenvectors
+  //hence the smoother can be tuned after running the coarse Lanczos by using a different smoother here
  void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax) 
  {
    assert(evals_fine.size() == nbasis);
@@ -374,25 +390,31 @@ public:
    evals_fine.resize(nbasis);
    subspace.resize(nbasis,_FineGrid);
  }
+
+
+  //cheby_op: Parameters of the fine grid Chebyshev polynomial used for the Lanczos acceleration
+  //cheby_smooth: Parameters of a separate Chebyshev polynomial used after the Lanczos has completed to smooth out high frequency noise in the reconstructed fine grid eigenvectors prior to computing the eigenvalue
+  //relax: Reconstructed eigenvectors (post smoothing) are naturally not as precise as true eigenvectors. This factor acts as a multiplier on the stopping condition when determining whether the results satisfy the user provided stopping condition
  void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
 		  int Nstop, int Nk, int Nm,RealD resid, 
 		  RealD MaxIt, RealD betastp, int MinRes)
  {
-    Chebyshev<FineField>                          Cheby(cheby_op);
-    ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,subspace);
-    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
+    Chebyshev<FineField>                          Cheby(cheby_op); //Chebyshev of fine operator on fine grid
+    ProjectedHermOp<Fobj,CComplex,nbasis>         Op(_FineOp,subspace); //Fine operator on coarse grid with intermediate fine grid conversion
+    ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace); //Chebyshev of fine operator on coarse grid with intermediate fine grid conversion
    //////////////////////////////////////////////////////////////////////////////////////////////////
    // create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
    //////////////////////////////////////////////////////////////////////////////////////////////////

-    Chebyshev<FineField>                                           ChebySmooth(cheby_smooth);
-    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
+    Chebyshev<FineField>                                           ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
+    ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax); 

    evals_coarse.resize(Nm);
    evec_coarse.resize(Nm,_CoarseGrid);

    CoarseField src(_CoarseGrid);     src=1.0; 

+    //Note the "tester" here is also responsible for generating the fine grid eigenvalues which are output into the "evals_coarse" array
    ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
    int Nconv=0;
    IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
@@ -403,6 +425,14 @@ public:
      std::cout << i << " Coarse eval = " << evals_coarse[i]  << std::endl;
    }
  }
+
+  //Get the fine eigenvector 'i' by reconstruction
+  void getFineEvecEval(FineField &evec, RealD &eval, const int i) const{
+    blockPromote(evec_coarse[i],evec,subspace);  
+    eval = evals_coarse[i];
+  }
+    
+    
 };

 NAMESPACE_END(Grid);
--- a/Grid/threads/Accelerator.cc
+++ b/Grid/threads/Accelerator.cc
@@ -91,6 +91,8 @@ void acceleratorInit(void)
  printf("AcceleratorCudaInit: Configure options --enable-setdevice=yes \n");
  cudaSetDevice(rank);
 #endif
+
+  cudaStreamCreate(&copyStream);
  if ( world_rank == 0 )  printf("AcceleratorCudaInit: ================================================\n");
 }
 #endif
--- a/tests/lanczos/Test_compressed_lanczos_gparity.cc
+++ b/tests/lanczos/Test_compressed_lanczos_gparity.cc
@@ -0,0 +1,425 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_compressed_lanczos_gparity.cc
+
+    Copyright (C) 2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Leans heavily on Christoph Lehner's code
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+/*
+ *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features 
+ *  in Grid that were intended to be used to support blocked Aggregates, from
+ */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
+
+using namespace std;
+using namespace Grid;
+
+// template<class VectorInt>
+// void GridCmdOptionIntVector(const std::string &str, VectorInt & vec)
+// {
+//   vec.resize(0);
+//   std::stringstream ss(str);
+//   int i;
+//   while (ss >> i){
+//     vec.push_back(i);
+//     if(std::ispunct(ss.peek()))
+//       ss.ignore();
+//   }
+//   return;
+// }
+
+
+//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
+void readConfiguration(LatticeGaugeFieldD &U,
+		       const std::string &config,
+		       bool is_cps_cfg = false){
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+  typedef GaugeStatistics<ConjugateGimplD> GaugeStats;
+     
+  FieldMetaData header;
+  NerscIO::readConfiguration<GaugeStats>(U, header, config);
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
+}
+
+//Lanczos parameters in CPS conventions
+struct CPSLanczosParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
+				  RealD, alpha,
+				  RealD, beta,
+				  int, ch_ord,
+				  int, N_use,
+				  int, N_get,
+				  int, N_true_get,
+				  RealD, stop_rsd,
+				  int, maxits);
+
+  //Translations
+  ChebyParams getChebyParams() const{
+    ChebyParams out;
+    out.alpha = beta*beta; //aka lo
+    out.beta = alpha*alpha; //aka hi
+    out.Npoly = ch_ord+1;
+    return out;
+  }
+  int Nstop() const{ return N_true_get; }
+  int Nm() const{ return N_use; }
+  int Nk() const{ return N_get; }
+};
+
+//Maybe this class should be in the main library?
+template<class Fobj,class CComplex,int nbasis>
+class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
+{ 
+public:
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
+  typedef Lattice<Fobj>          FineField;
+
+  LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
+			      LinearOperatorBase<FineField> &FineOp,
+			      int checkerboard) 
+    // Base constructor
+    : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard) 
+  {};
+
+  void checkpointFine(std::string evecs_file,std::string evals_file)
+  {
+    assert(this->subspace.size()==nbasis);
+    emptyUserRecord record;
+    Grid::ScidacWriter WR(this->_FineGrid->IsBoss());
+    WR.open(evecs_file);
+    for(int k=0;k<nbasis;k++) {
+      WR.writeScidacFieldRecord(this->subspace[k],record);
+    }
+    WR.close();
+    
+    XmlWriter WRx(evals_file);
+    write(WRx,"evals",this->evals_fine);
+  }
+
+  void checkpointFineRestore(std::string evecs_file,std::string evals_file)
+  {
+    this->evals_fine.resize(nbasis);
+    this->subspace.resize(nbasis,this->_FineGrid);
+    
+    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",this->evals_fine);
+    
+    assert(this->evals_fine.size()==nbasis);
+    
+    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evecs from "<<evecs_file<<std::endl;
+    emptyUserRecord record;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<nbasis;k++) {
+      this->subspace[k].Checkerboard()=this->_checkerboard;
+      RD.readScidacFieldRecord(this->subspace[k],record);
+      
+    }
+    RD.close();
+  }
+
+  void checkpointCoarse(std::string evecs_file,std::string evals_file)
+  {
+    int n = this->evec_coarse.size();
+    emptyUserRecord record;
+    Grid::ScidacWriter WR(this->_CoarseGrid->IsBoss());
+    WR.open(evecs_file);
+    for(int k=0;k<n;k++) {
+      WR.writeScidacFieldRecord(this->evec_coarse[k],record);
+    }
+    WR.close();
+    
+    XmlWriter WRx(evals_file);
+    write(WRx,"evals",this->evals_coarse);
+  }
+
+  void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
+  {
+    std::cout << "resizing coarse vecs to " << nvec<< std::endl;
+    this->evals_coarse.resize(nvec);
+    this->evec_coarse.resize(nvec,this->_CoarseGrid);
+    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",this->evals_coarse);
+
+    assert(this->evals_coarse.size()==nvec);
+    emptyUserRecord record;
+    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evecs from "<<evecs_file<<std::endl;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<nvec;k++) {
+      RD.readScidacFieldRecord(this->evec_coarse[k],record);
+    }
+    RD.close();
+  }
+};
+
+
+//Note:  because we rely upon physical properties we must use a "real" gauge configuration
+int main (int argc, char ** argv) {
+  Grid_init(&argc,&argv);
+  GridLogIRL.TimingMode(1);
+
+  std::vector<int> blockSize = {2,2,2,2,2};
+  std::vector<int> GparityDirs = {1,1,1}; //1 for each GP direction
+
+  int Ls = 12;
+  RealD mass = 0.01;
+  RealD M5 = 1.8;
+  bool is_cps_cfg = false;
+
+  CPSLanczosParams fine, coarse;
+
+  fine.alpha = 2;
+  fine.beta = 0.1;
+  fine.ch_ord = 100;
+  fine.N_use = 70;
+  fine.N_get = 60;
+  fine.N_true_get = 60;
+  fine.stop_rsd = 1e-8;
+  fine.maxits = 10000;
+
+  coarse.alpha = 2;
+  coarse.beta = 0.1;
+  coarse.ch_ord = 100;
+  coarse.N_use = 200;
+  coarse.N_get = 190;
+  coarse.N_true_get = 190;
+  coarse.stop_rsd = 1e-8;
+  coarse.maxits = 10000;
+
+  double coarse_relax_tol = 1e5;
+  int smoother_ord = 20;
+  
+  if(argc < 3){
+    std::cout << GridLogMessage << "Usage: <exe> <config> <gparity dirs> <options>" << std::endl;
+    std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
+    std::cout << GridLogMessage << "Options:" << std::endl;
+    std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
+    std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
+    std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents  (default 2.2.2.2.2)" << std::endl;
+    std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
+    std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_coarse <filename>: Real the parameters file for the coarse Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--write_coarse <filename stub>: Write coarse evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_coarse <filename stub>: Read coarse evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--smoother_ord :  Set the Chebyshev order of the smoother (default 20)" << std::endl;
+    std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
+    Grid_finalize();
+    return 1;
+  }
+  std::string config = argv[1];
+  GridCmdOptionIntVector(argv[2], GparityDirs);
+  assert(GparityDirs.size() == 3);
+
+  bool write_fine = false;
+  std::string write_fine_file;
+
+  bool read_fine = false;
+  std::string read_fine_file;
+
+  bool write_coarse = false;
+  std::string write_coarse_file;
+
+  bool read_coarse = false;
+  std::string read_coarse_file;
+ 
+  for(int i=3;i<argc;i++){
+    std::string sarg = argv[i];
+    if(sarg == "--Ls"){
+      Ls = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set Ls to " << Ls << std::endl;
+    }else if(sarg == "--mass"){
+      std::istringstream ss(argv[i+1]); ss >> mass;
+      std::cout << GridLogMessage << "Set quark mass to " << mass << std::endl;
+    }else if(sarg == "--block"){
+      GridCmdOptionIntVector(argv[i+1], blockSize);
+      assert(blockSize.size() == 5);
+      std::cout << GridLogMessage << "Set block size to ";
+      for(int q=0;q<5;q++) std::cout << blockSize[q] << " ";
+      std::cout << std::endl;      
+    }else if(sarg == "--is_cps_cfg"){
+      is_cps_cfg = true;
+    }else if(sarg == "--write_irl_templ"){
+      XmlWriter writer("irl_templ.xml");
+      write(writer,"Params",fine);
+      Grid_finalize();
+      return 0;
+    }else if(sarg == "--read_irl_fine"){
+      std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", fine);
+    }else if(sarg == "--read_irl_coarse"){
+      std::cout << GridLogMessage << "Reading coarse IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", coarse);
+    }else if(sarg == "--write_fine"){
+      write_fine = true;
+      write_fine_file = argv[i+1];
+    }else if(sarg == "--read_fine"){
+      read_fine = true;
+      read_fine_file = argv[i+1];
+    }else if(sarg == "--write_coarse"){
+      write_coarse = true;
+      write_coarse_file = argv[i+1];
+    }else if(sarg == "--read_coarse"){
+      read_coarse = true;
+      read_coarse_file = argv[i+1];
+    }else if(sarg == "--smoother_ord"){
+      std::istringstream ss(argv[i+1]); ss >> smoother_ord;
+      std::cout << GridLogMessage << "Set smoother order to " << smoother_ord << std::endl;
+    }else if(sarg == "--coarse_relax_tol"){
+      std::istringstream ss(argv[i+1]); ss >> coarse_relax_tol;
+      std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << coarse_relax_tol << std::endl;
+    }      
+  }
+  
+  //Fine grids
+  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  GridDefaultSimd(Nd,vComplex::Nsimd()),   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  //Setup G-parity BCs
+  assert(Nd == 4);
+  std::vector<int> dirs4(4);
+  for(int i=0;i<3;i++) dirs4[i] = GparityDirs[i];
+  dirs4[3] = 0; //periodic gauge BC in time
+  
+  std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
+  ConjugateGimplD::setDirections(dirs4); //gauge BC
+
+  GparityWilsonImplD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+  std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
+  
+  //Read the gauge field
+  LatticeGaugeField Umu(UGrid);  
+  readConfiguration(Umu, config, is_cps_cfg);
+
+  //Setup the coarse grids  
+  auto fineLatt     = GridDefaultLatt();
+  Coordinate coarseLatt(4);
+  for (int d=0;d<4;d++){
+    coarseLatt[d] = fineLatt[d]/blockSize[d];    assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
+  }
+
+  std::cout << GridLogMessage<< " 5d coarse lattice is ";
+  for (int i=0;i<4;i++){
+    std::cout << coarseLatt[i]<<"x";
+  } 
+  int cLs = Ls/blockSize[4]; assert(cLs*blockSize[4]==Ls);
+  std::cout << cLs<<std::endl;
+  
+  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
+  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
+
+  //Dirac operator
+  GparityDomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params);
+  typedef GparityDomainWallFermionD::FermionField FermionField;
+  
+  SchurDiagTwoOperator<GparityDomainWallFermionD,FermionField> SchurOp(action);
+
+  typedef GparityWilsonImplD::SiteSpinor SiteSpinor;
+
+  std::cout << GridLogMessage << "Keep " << fine.N_true_get   << " fine   vectors" << std::endl;
+  std::cout << GridLogMessage << "Keep " << coarse.N_true_get << " coarse vectors" << std::endl;
+  assert(coarse.N_true_get >= fine.N_true_get);
+
+  const int nbasis= 60;
+  assert(nbasis<=fine.N_true_get);
+  LocalCoherenceLanczosScidac<SiteSpinor,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,SchurOp,Odd);
+  std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
+
+  //Compute and/or read fine evecs
+  if(read_fine){
+    _LocalCoherenceLanczos.checkpointFineRestore(read_fine_file + "_evecs.scidac", read_fine_file + "_evals.xml");
+  }else{
+    std::cout << GridLogMessage << "Performing fine grid IRL" << std::endl;
+    std::cout << GridLogMessage << "Using Chebyshev alpha=" << fine.alpha << " beta=" << fine.beta << " ord=" << fine.ch_ord << std::endl;
+    _LocalCoherenceLanczos.calcFine(fine.getChebyParams(),
+				    fine.Nstop(),fine.Nk(),fine.Nm(),
+				    fine.stop_rsd,fine.maxits,0,0);
+    if(write_fine){
+      std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
+      _LocalCoherenceLanczos.checkpointFine(write_fine_file + "_evecs.scidac", write_fine_file + "_evals.xml");
+    }
+  }
+  
+  //Block orthonormalise (this should be part of calcFine?)
+  std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
+  _LocalCoherenceLanczos.Orthogonalise();
+  std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
+
+  ChebyParams smoother = fine.getChebyParams();
+  smoother.Npoly = smoother_ord+1;
+
+  if(read_coarse){
+    _LocalCoherenceLanczos.checkpointCoarseRestore(read_coarse_file + "_evecs.scidac", read_coarse_file + "_evals.xml",coarse.Nstop());
+
+  }else{
+    std::cout << GridLogMessage << "Performing coarse grid IRL" << std::endl;
+    std::cout << GridLogMessage << "Using Chebyshev alpha=" << coarse.alpha << " beta=" << coarse.beta << " ord=" << coarse.ch_ord << std::endl;	
+    _LocalCoherenceLanczos.calcCoarse(coarse.getChebyParams(), smoother, coarse_relax_tol,
+				      coarse.Nstop(), coarse.Nk() ,coarse.Nm(),
+				      coarse.stop_rsd, coarse.maxits, 
+				      0,0);
+
+    if(write_coarse){
+      std::cout << GridLogIRL<<"Checkpointing Coarse evecs"<<std::endl;
+      _LocalCoherenceLanczos.checkpointCoarse(write_coarse_file + "_evecs.scidac", write_coarse_file + "_evals.xml");
+    }
+
+  }
+
+  //Test the eigenvectors
+  FermionField evec(FrbGrid);
+  FermionField tmp(FrbGrid);
+  RealD eval;
+  
+  for(int i=0;i<coarse.N_true_get;i++){    
+    _LocalCoherenceLanczos.getFineEvecEval(evec, eval, i);
+    SchurOp.HermOp(evec, tmp);
+    tmp = tmp - eval*evec;
+    std::cout << GridLogMessage << "Eval " << eval << " resid " << sqrt(norm2(tmp)) << std::endl;
+  }
+
+  Grid_finalize();
+}
+
--- a/tests/lanczos/Test_evec_compression.cc
+++ b/tests/lanczos/Test_evec_compression.cc
@@ -0,0 +1,434 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_evec_compression.cc
+
+    Copyright (C) 2017
+
+Author: Christopher Kelly <ckelly@bnl.gov>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+/*
+ *
+ * This test generates eigenvectors using the Lanczos algorithm then attempts to use local coherence compression
+ * to express those vectors in terms of a basis formed from a subset. This test is useful for finding the optimal
+ * blocking and basis size for performing a Local Coherence Lanczos
+ */
+#include <Grid/Grid.h>
+#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
+
+using namespace std;
+using namespace Grid;
+
+//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
+void readConfiguration(LatticeGaugeFieldD &U,
+		       const std::string &config,
+		       bool is_cps_cfg = false){
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
+
+  typedef GaugeStatistics<ConjugateGimplD> GaugeStats;
+     
+  FieldMetaData header;
+  NerscIO::readConfiguration<GaugeStats>(U, header, config);
+
+  if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
+}
+
+//Lanczos parameters in CPS conventions
+struct CPSLanczosParams : Serializable {
+public:
+  GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
+				  RealD, alpha,
+				  RealD, beta,
+				  int, ch_ord,
+				  int, N_use,
+				  int, N_get,
+				  int, N_true_get,
+				  RealD, stop_rsd,
+				  int, maxits);
+
+  //Translations
+  ChebyParams getChebyParams() const{
+    ChebyParams out;
+    out.alpha = beta*beta; //aka lo
+    out.beta = alpha*alpha; //aka hi
+    out.Npoly = ch_ord+1;
+    return out;
+  }
+  int Nstop() const{ return N_true_get; }
+  int Nm() const{ return N_use; }
+  int Nk() const{ return N_get; }
+};
+
+
+template<class Fobj,class CComplex,int nbasis>
+class LocalCoherenceCompressor{
+public:
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar; // used for inner products on fine field
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<Fobj>                       FineField;
+  
+  void compress(std::vector<FineField> &basis,
+		std::vector<CoarseField> &compressed_evecs,
+		const std::vector<FineField> &evecs_in,
+		GridBase *FineGrid,
+		GridBase *CoarseGrid){
+    int nevecs = evecs_in.size();
+    assert(nevecs > nbasis);
+    
+    //Construct the basis
+    basis.resize(nbasis, FineGrid);
+    for(int b=0;b<nbasis;b++) basis[b] = evecs_in[b];
+
+    //Block othornormalize basis
+    CoarseScalar InnerProd(CoarseGrid);
+    std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
+    blockOrthogonalise(InnerProd,basis);
+    std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
+    blockOrthogonalise(InnerProd,basis);
+
+    //The coarse grid representation is the field of vectors of block inner products
+    std::cout << GridLogMessage << "Compressing eigevectors" << std::endl;
+    compressed_evecs.resize(nevecs, CoarseGrid);
+    for(int i=0;i<nevecs;i++) blockProject(compressed_evecs[i], evecs_in[i], basis);
+    std::cout << GridLogMessage << "Compression complete" << std::endl;
+  }
+
+  void uncompress(FineField &evec, const int i, const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs) const{
+    blockPromote(compressed_evecs[i],evec,basis);  
+  }
+
+  //Test uncompressed eigenvectors of Linop.HermOp to precision 'base_tolerance' for i<nbasis and 'base_tolerance*relax' for i>=nbasis
+  bool testCompression(LinearOperatorBase<FineField> &Linop,
+		       const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<RealD> &evals,
+		       const RealD base_tolerance, const RealD relax){
+    GridBase* FineGrid = basis[0].Grid();
+    GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+    bool fail = false;
+    FineField evec(FineGrid), Mevec(FineGrid);
+    for(int i=0;i<compressed_evecs.size();i++){
+      std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
+      uncompress(evec, i, basis, compressed_evecs);
+
+      std::cout << GridLogMessage << "Computing residual for evec " << i << std::endl;
+      std::cout << GridLogMessage << "Linop" << std::endl;
+      Linop.HermOp(evec, Mevec);
+      std::cout << GridLogMessage << "Linalg" << std::endl;
+      Mevec = Mevec - evals[i]*evec;
+
+      std::cout << GridLogMessage << "Resid" << std::endl;
+      RealD tol = base_tolerance * (i<nbasis ? 1. : relax);
+      RealD res = sqrt(norm2(Mevec));
+      std::cout << GridLogMessage << "Evec idx " << i << " res " << res << " tol " << tol << std::endl;
+      if(res > tol) fail = true;
+    }
+    return fail;
+  }
+};
+
+template<class Fobj,class CComplex,int nbasis>
+void compareBlockPromoteTimings(const std::vector<Lattice<Fobj> > &basis, const std::vector<Lattice<iVector<CComplex,nbasis > > > &compressed_evecs){
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar; 
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+  typedef Lattice<Fobj>                       FineField;
+
+  GridStopWatch timer;
+  
+  GridBase* FineGrid = basis[0].Grid();
+  GridBase* CoarseGrid = compressed_evecs[0].Grid();
+
+  FineField v1(FineGrid), v2(FineGrid);
+
+  //Start with a cold start
+  for(int i=0;i<basis.size();i++){
+    autoView( b_ , basis[i], CpuWrite);
+  }
+  for(int i=0;i<compressed_evecs.size();i++){
+    autoView( b_ , compressed_evecs[i], CpuWrite);
+  }
+  {
+    autoView( b_, v1, CpuWrite );
+  }
+
+  timer.Start();
+  blockPromote(compressed_evecs[0],v1,basis);  
+  timer.Stop();
+  std::cout << GridLogMessage << "Time for cold blockPromote v1 " << timer.Elapsed() << std::endl;
+
+  //Test to ensure it is actually doing a cold start by repeating
+  for(int i=0;i<basis.size();i++){
+    autoView( b_ , basis[i], CpuWrite);
+  }
+  for(int i=0;i<compressed_evecs.size();i++){
+    autoView( b_ , compressed_evecs[i], CpuWrite);
+  }
+  {
+    autoView( b_, v1, CpuWrite );
+  }
+
+  timer.Reset();
+  timer.Start();
+  blockPromote(compressed_evecs[0],v1,basis);  
+  timer.Stop();
+  std::cout << GridLogMessage << "Time for cold blockPromote v1 repeat (should be the same as above) " << timer.Elapsed() << std::endl;
+}
+
+
+  
+
+//Note:  because we rely upon physical properties we must use a "real" gauge configuration
+int main (int argc, char ** argv) {
+  Grid_init(&argc,&argv);
+  GridLogIRL.TimingMode(1);
+
+  std::vector<int> blockSize = {2,2,2,2,2};
+  std::vector<int> GparityDirs = {1,1,1}; //1 for each GP direction
+
+  int Ls = 12;
+  RealD mass = 0.01;
+  RealD M5 = 1.8;
+  bool is_cps_cfg = false;
+
+  CPSLanczosParams fine;
+
+  fine.alpha = 2;
+  fine.beta = 0.1;
+  fine.ch_ord = 100;
+  fine.N_use = 70;
+  fine.N_get = 60;
+  fine.N_true_get = 60;
+  fine.stop_rsd = 1e-8;
+  fine.maxits = 10000;
+
+  double coarse_relax_tol = 1e5;
+ 
+  if(argc < 3){
+    std::cout << GridLogMessage << "Usage: <exe> <config> <gparity dirs> <options>" << std::endl;
+    std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
+    std::cout << GridLogMessage << "Options:" << std::endl;
+    std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
+    std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
+    std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents  (default 2.2.2.2.2)" << std::endl;
+    std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
+    std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
+    std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
+    std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;
+    std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
+    Grid_finalize();
+    return 1;
+  }
+  std::string config = argv[1];
+  GridCmdOptionIntVector(argv[2], GparityDirs);
+  assert(GparityDirs.size() == 3);
+
+  bool write_fine = false;
+  std::string write_fine_file;
+
+  bool read_fine = false;
+  std::string read_fine_file;
+
+  for(int i=3;i<argc;i++){
+    std::string sarg = argv[i];
+    if(sarg == "--Ls"){
+      Ls = std::stoi(argv[i+1]);
+      std::cout << GridLogMessage << "Set Ls to " << Ls << std::endl;
+    }else if(sarg == "--mass"){
+      std::istringstream ss(argv[i+1]); ss >> mass;
+      std::cout << GridLogMessage << "Set quark mass to " << mass << std::endl;
+    }else if(sarg == "--block"){
+      GridCmdOptionIntVector(argv[i+1], blockSize);
+      assert(blockSize.size() == 5);
+      std::cout << GridLogMessage << "Set block size to ";
+      for(int q=0;q<5;q++) std::cout << blockSize[q] << " ";
+      std::cout << std::endl;      
+    }else if(sarg == "--is_cps_cfg"){
+      is_cps_cfg = true;
+    }else if(sarg == "--write_irl_templ"){
+      XmlWriter writer("irl_templ.xml");
+      write(writer,"Params",fine);
+      Grid_finalize();
+      return 0;
+    }else if(sarg == "--read_irl_fine"){
+      std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
+      XmlReader reader(argv[i+1]);
+      read(reader, "Params", fine);
+    }else if(sarg == "--write_fine"){
+      write_fine = true;
+      write_fine_file = argv[i+1];
+    }else if(sarg == "--read_fine"){
+      read_fine = true;
+      read_fine_file = argv[i+1];
+    }else if(sarg == "--coarse_relax_tol"){
+      std::istringstream ss(argv[i+1]); ss >> coarse_relax_tol;
+      std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << coarse_relax_tol << std::endl;
+    }      
+  }
+  
+  //Fine grids
+  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),  GridDefaultSimd(Nd,vComplex::Nsimd()),   GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+  //Setup G-parity BCs
+  assert(Nd == 4);
+  std::vector<int> dirs4(4);
+  for(int i=0;i<3;i++) dirs4[i] = GparityDirs[i];
+  dirs4[3] = 0; //periodic gauge BC in time
+  
+  std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
+  ConjugateGimplD::setDirections(dirs4); //gauge BC
+
+  GparityWilsonImplD::ImplParams Params;
+  for(int i=0;i<Nd-1;i++) Params.twists[i] = GparityDirs[i]; //G-parity directions
+  Params.twists[Nd-1] = 1; //APBC in time direction
+  std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
+  
+  //Read the gauge field
+  LatticeGaugeField Umu(UGrid);  
+  readConfiguration(Umu, config, is_cps_cfg);
+
+  //Setup the coarse grids  
+  auto fineLatt     = GridDefaultLatt();
+  Coordinate coarseLatt(4);
+  for (int d=0;d<4;d++){
+    coarseLatt[d] = fineLatt[d]/blockSize[d];    assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
+  }
+
+  std::cout << GridLogMessage<< " 5d coarse lattice is ";
+  for (int i=0;i<4;i++){
+    std::cout << coarseLatt[i]<<"x";
+  } 
+  int cLs = Ls/blockSize[4]; assert(cLs*blockSize[4]==Ls);
+  std::cout << cLs<<std::endl;
+  
+  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
+  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
+  const int nbasis= 60;
+  typedef vTComplex CComplex; 
+  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
+  typedef Lattice<CComplex>                   CoarseScalar;
+  typedef Lattice<CoarseSiteVector>           CoarseField;
+ 
+  //Dirac operator
+  GparityDomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params);
+  typedef GparityDomainWallFermionD::FermionField FermionField;
+  
+  SchurDiagTwoOperator<GparityDomainWallFermionD,FermionField> SchurOp(action);
+
+  typedef GparityWilsonImplD::SiteSpinor SiteSpinor;
+
+  //Do the fine Lanczos
+  std::vector<RealD> evals;
+  std::vector<FermionField> evecs;
+
+  if(read_fine){
+    evals.resize(fine.N_true_get);
+    evecs.resize(fine.N_true_get, FrbGrid);
+
+    std::string evals_file = read_fine_file + "_evals.xml";
+    std::string evecs_file = read_fine_file + "_evecs.scidac";
+    
+    std::cout << GridLogIRL<< "Reading evals from "<<evals_file<<std::endl;
+    XmlReader RDx(evals_file);
+    read(RDx,"evals",evals);
+    
+    assert(evals.size()==fine.N_true_get);
+    
+    std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
+    emptyUserRecord record;
+    Grid::ScidacReader RD ;
+    RD.open(evecs_file);
+    for(int k=0;k<fine.N_true_get;k++) {
+      evecs[k].Checkerboard()=Odd;
+      RD.readScidacFieldRecord(evecs[k],record);
+      
+    }
+    RD.close();
+  }else{ 
+    int Nstop = fine.Nstop();
+    int Nm = fine.Nm();
+    int Nk = fine.Nk();
+    RealD resid = fine.stop_rsd;
+    int MaxIt = fine.maxits;
+    
+    assert(nbasis<=Nm);    
+    Chebyshev<FermionField>      Cheby(fine.getChebyParams());
+    FunctionHermOp<FermionField> ChebyOp(Cheby,SchurOp);
+    PlainHermOp<FermionField>    Op(SchurOp);
+
+    evals.resize(Nm);
+    evecs.resize(Nm,FrbGrid);
+    
+    ImplicitlyRestartedLanczos<FermionField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,0,0);
+
+    FermionField src(FrbGrid); 
+    typedef typename FermionField::scalar_type Scalar;
+    src=Scalar(1.0); 
+    src.Checkerboard() = Odd;
+
+    int Nconv;
+    IRL.calc(evals, evecs,src,Nconv,false);
+
+    if(write_fine){
+      std::string evals_file = write_fine_file + "_evals.xml";
+      std::string evecs_file = write_fine_file + "_evecs.scidac";
+
+      std::cout << GridLogIRL<< "Writing evecs to "<<evecs_file<<std::endl;
+
+      emptyUserRecord record;
+      Grid::ScidacWriter WR(FrbGrid->IsBoss());
+      WR.open(evecs_file);
+      for(int k=0;k<evecs.size();k++) {
+	WR.writeScidacFieldRecord(evecs[k],record);
+      }
+      WR.close();
+
+      std::cout << GridLogIRL<< "Writing evals to "<<evals_file<<std::endl;
+      
+      XmlWriter WRx(evals_file);
+      write(WRx,"evals",evals);
+    }    
+  }
+    
+  //Do the compression
+  LocalCoherenceCompressor<SiteSpinor,vTComplex,nbasis> compressor;
+  std::vector<FermionField> basis(nbasis,FrbGrid);
+  std::vector<CoarseField> compressed_evecs(evecs.size(),CoarseGrid5);
+  
+  compressor.compress(basis, compressed_evecs, evecs, FrbGrid, CoarseGrid5);
+
+  compareBlockPromoteTimings(basis, compressed_evecs);
+
+
+  
+  //Test the result
+  assert( compressor.testCompression(SchurOp, basis, compressed_evecs, evals, fine.stop_rsd, coarse_relax_tol) );   
+
+  Grid_finalize();
+}