Improvements

2026-02-02 05:13:28 +00:00 · 2024-04-01 14:18:40 -04:00
parent da890dc293
commit 5b79d51c22
2 changed files with 65 additions and 328 deletions
--- a/tests/debug/Test_general_coarse_hdcg_phys.cc
+++ b/tests/debug/Test_general_coarse_hdcg_phys.cc
@@ -131,6 +131,7 @@ public:
  void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  };
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
 };
+/*
 template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
 {
 public:
@@ -151,7 +152,7 @@ public:
    Cheby(_SmootherOperator,tmp,out);
  }
 };
-
+*/
 template<class Field> class CGSmoother : public LinearFunction<Field>
 {
 public:
@@ -179,8 +180,6 @@ int main (int argc, char ** argv)

  const int Ls=24;
  const int nbasis = 62;
-  //  const int nbasis = 56;
-  //  const int nbasis = 44;
  const int cb = 0 ;
  RealD mass=0.00078;
  RealD M5=1.8;
@@ -357,31 +356,17 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
  CoarseVector c_res(Coarse5d); 
  CoarseVector c_ref(Coarse5d);

-  // Try projecting to one hop only
-  //  LittleDiracOp.ShiftMatrix(1.0e-4);
-  LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
-  LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
-
-  typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
-  HermMatrix CoarseOp     (LittleDiracOp);
-  HermMatrix CoarseOpProj (LittleDiracOpProj);
-  
  //////////////////////////////////////////
  // Build a coarse lanczos
  //////////////////////////////////////////
-  //  Chebyshev<CoarseVector>      IRLCheby(0.012,40.0,201);  //500 HDCG iters
-  //  int Nk=512; // Didn't save much
-  //  int Nm=640;
-  //  int Nstop=400;
-
-  //  Chebyshev<CoarseVector>      IRLCheby(0.005,40.0,201);  //319 HDCG iters @ 128//160 nk.
-  //  int Nk=128;
-  //  int Nm=160;
-  Chebyshev<CoarseVector>      IRLCheby(0.005,40.0,201);  //319 HDCG iters @ 128//160 nk.
+  typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
+  HermMatrix CoarseOp     (LittleDiracOp);
+  
  int Nk=192;
  int Nm=256;
  int Nstop=Nk;
  
+  Chebyshev<CoarseVector>      IRLCheby(0.005,40.0,201);
  //  Chebyshev<CoarseVector>      IRLCheby(0.010,45.0,201);  // 1 iter
  FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp);
  PlainHermOp<CoarseVector>    IRLOp    (CoarseOp);
@@ -395,208 +380,40 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
  PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src);

  IRL.calc(eval,evec,c_src,Nconv);
-  DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);

  //////////////////////////////////////////
-  // Build a coarse space solver
+  // Deflated guesser
  //////////////////////////////////////////
+  DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
+
  int maxit=30000;
  ConjugateGradient<CoarseVector>  CG(1.0e-10,maxit,false);
  ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,30000,false);
-  ZeroGuesser<CoarseVector> CoarseZeroGuesser;

-  //  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
-  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
-  c_res=Zero();
-  //  HPDSolve(c_src,c_res); c_ref = c_res;
-  //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
-  //  std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl;
-  //////////////////////////////////////////////////////////////////////////
-  // Deflated (with real op EV's) solve for the projected coarse op
-  // Work towards ADEF1 in the coarse space
-  //////////////////////////////////////////////////////////////////////////
-  HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
-  c_res=Zero();
-  //  HPDSolveProj(c_src,c_res);
-  //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
-  //  std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl;
-  //  c_res = c_res - c_ref;
-  //  std::cout << "Projected solver error "<<norm2(c_res)<<std::endl;
-
-  //////////////////////////////////////////////////////////////////////
-  // Coarse ADEF1 with deflation space
-  //////////////////////////////////////////////////////////////////////
-  ChebyshevSmoother<CoarseVector >  CoarseSmoother(1.0,37.,8,CoarseOpProj);  // just go to sloppy 0.1 convergence
-    //  CoarseSmoother(0.1,37.,8,CoarseOpProj);  //
-  //  CoarseSmoother(0.5,37.,6,CoarseOpProj);  //  8 iter 0.36s
-  //    CoarseSmoother(0.5,37.,12,CoarseOpProj);  // 8 iter, 0.55s
-  //    CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter
-  //  CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter
-  //  ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj);  // 311
-
-  ////////////////////////////////////////////////////////
-  // CG, Cheby mode spacing 200,200
-  // Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
-  // Unprojected Coarse CG solve to 4e-2 :  33 iters, 0.8s
-  // Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
-  ////////////////////////////////////////////////////////
-  // CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs 
-  ////////////////////////////////////////////////////////
-  // ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s  2.1x gain
-  // ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
-  // HDCG 38 iters 162s
-  //
-  // CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs 
-  // ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s  2.1x gain
-  // ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
-  // HDCG 38 iters 169s
-
-  TwoLevelADEF1defl<CoarseVector>
-    cADEF1(1.0e-8, 500,
-	   CoarseOp,
-	   CoarseSmoother,
-	   evec,eval);
-
-  //  c_res=Zero();
-  //  cADEF1(c_src,c_res);
-  //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
-  //  std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
-  //  c_res = c_res - c_ref;
-  //  std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
-  
-  //  cADEF1.Tolerance = 4.0e-2;
-  //  cADEF1.Tolerance = 1.0e-1;
-  //  cADEF1.Tolerance = 5.0e-2;
-  //  c_res=Zero();
-  //  cADEF1(c_src,c_res);
-  //  std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
-  //  std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
-  //  c_res = c_res - c_ref;
-  //  std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
-  
  //////////////////////////////////////////
-  // Build a smoother
+  // HDCG
  //////////////////////////////////////////
-  //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499
-  //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp);  //383
-  //  ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp);  //328
-  //  std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1
-  //  std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations
-  //  std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters
-  //  std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters
-
-  //
-  // Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40
-  // Need to measure cost of coarse space.
-  //
-  // -- i) Reduce coarse residual   -- 0.04
-  // -- ii) Lanczos on coarse space -- done
-  // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and
-  //         use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec
-  //
-  //
-  //
-  //
  
  std::vector<RealD> los({2.0,2.5}); // Nbasis 40 == 36,36 iters
-
-  //  std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
-  //  std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
  std::vector<int> ords({9}); // Nbasis 40 == 40 iters (320 mults)  

- /*
-   Smoother opt @56 nbasis, 0.04 convergence, 192 evs
- ord lo
-
- 16   0.1  no converge -- likely sign indefinite
- 32   0.1  no converge -- likely sign indefinite(?)
-
- 16   0.5  422
- 32   0.5  302
- 
- 8   1.0  575
- 12  1.0  449
- 16  1.0  375
- 32  1.0  302
-
- 12  3.0  476
- 16  3.0  319
- 32  3.0  306
-
- Powerlaw setup 62 vecs
-slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
-slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
-
- Cheby setup: 56vecs
- -- CG smoother O(16): 487
- 
-Power law setup, 56 vecs -- lambda^-5
-slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32
-
-Power law setup, 56 vecs -- lambda^-3
-
-slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s  1.0 32
-slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s  1.0 16
-slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s  0.5 32
-
- Power law setup, 56 vecs -- lambda^-4
- -- CG smoother    O(16): 290
- -- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
-
-Grid : Message : 2790.797194 s : HDCG: Pcg converged in 190 iterations and 1049.563182 s 1.0 32
-Grid : Message : 3766.374396 s : HDCG: Pcg converged in 218 iterations and 975.455668 s  1.0 16
-Grid : Message : 4888.746190 s : HDCG: Pcg converged in 191 iterations and 1122.252055 s 0.5 32
-Grid : Message : 5956.679661 s : HDCG: Pcg converged in 231 iterations and 1067.812850 s 0.5 16
-
-Grid : Message : 2767.405829 s : HDCG: Pcg converged in 218 iterations and 967.214067 s -- 16
-Grid : Message : 3816.165905 s : HDCG: Pcg converged in 251 iterations and 1048.636269 s -- 12
-Grid : Message : 5121.206572 s : HDCG: Pcg converged in 318 iterations and 1304.916168 s -- 8
-
- 
-[paboyle@login2.crusher debug]$ grep -v Memory slurm-402426.out  | grep converged | grep HDCG -- [1.0,16] cheby
-Grid : Message : 5185.521063 s : HDCG: Pcg converged in 377 iterations and 1595.843529 s
-
-[paboyle@login2.crusher debug]$ grep HDCG  slurm-402184.out | grep onver
-Grid : Message : 3760.438160 s : HDCG: Pcg converged in 422 iterations and 2129.243141 s
-Grid : Message : 5660.588015 s : HDCG: Pcg converged in 308 iterations and 1900.026821 s
-
- 
-Grid : Message : 4238.206528 s : HDCG: Pcg converged in 575 iterations and 2657.430676 s
-Grid : Message : 6345.880344 s : HDCG: Pcg converged in 449 iterations and 2108.505208 s
-
-grep onverg slurm-401663.out | grep HDCG
-Grid : Message : 3900.817781 s : HDCG: Pcg converged in 476 iterations and 1992.591311 s
-Grid : Message : 5647.202699 s : HDCG: Pcg converged in 306 iterations and 1746.838660 s
-
-
-[paboyle@login2.crusher debug]$ grep converged slurm-401775.out | grep HDCG
-Grid : Message : 3583.177025 s : HDCG: Pcg converged in 375 iterations and 1800.896037 s
-Grid : Message : 5348.342243 s : HDCG: Pcg converged in 302 iterations and 1765.045018 s
-
-Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
-
- */
-				      //
  for(int l=0;l<los.size();l++){

    RealD lo = los[l];

    for(int o=0;o<ords.size();o++){

+      //////////////////////////////////////////
+      // Sloppy coarse solve
+      //////////////////////////////////////////
+      
      ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false);
      HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
-      
-      //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
-      ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp);  // 311
+      HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);

-      /*
-       * CG smooth 11 iter: 
-       slurm-403825.out:Grid : Message : 4369.824339 s : HDCG: fPcg converged in 215 iterations 3.0
-       slurm-403908.out:Grid : Message : 3955.897470 s : HDCG: fPcg converged in 236 iterations 1.0
-       slurm-404273.out:Grid : Message : 3843.792191 s : HDCG: fPcg converged in 210 iterations 2.0
-       * CG smooth 9 iter: 
-      */
-      //
+      //////////////////////////////////////////
+      // IRS shifted smoother based on CG
+      //////////////////////////////////////////
      RealD MirsShift = lo;
      ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
      CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
@@ -607,25 +424,11 @@ Conclusion: higher order smoother is doing better. Much better. Use a Krylov smo
      TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
 	HDCG(1.0e-8, 700,
 	     FineHermOp,
-	     //	     ChebySmooth,
 	     CGsmooth,
 	     HPDSolveSloppy,
 	     HPDSolve,
 	     Aggregates);

-      /*
-	TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
-	HDCGdefl(1.0e-8, 700,
-		 FineHermOp,
-		 Smoother,
-		 cADEF1,
-		 HPDSolve,
-		 Aggregates);
-      */
-      
-      //      result=Zero();
-      //      HDCGdefl(src,result);
-
      result=Zero();
      HDCG(src,result);
      
--- a/tests/debug/Test_general_coarse_hdcg_phys48.cc
+++ b/tests/debug/Test_general_coarse_hdcg_phys48.cc
@@ -27,6 +27,8 @@ Author: Peter Boyle <pboyle@bnl.gov>
    /*  END LEGAL */
 #include <Grid/Grid.h>

+#include <Grid/algorithms/iterative/AdefMrhs.h>
+
 using namespace std;
 using namespace Grid;

@@ -146,10 +148,6 @@ void LoadEigenvectors(std::vector<RealD>            &eval,
 #endif
 }

-RealD InverseApproximation(RealD x){
-  return 1.0/x;
-}
-
 // Want Op in CoarsenOp to call MatPcDagMatPc
 template<class Field>
 class HermOpAdaptor : public LinearOperatorBase<Field>
@@ -165,26 +163,6 @@ public:
  void OpDirAll  (const Field &in, std::vector<Field> &out)  {    assert(0);  };
  void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){    assert(0);  }
 };
-template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
-{
-public:
-  using LinearFunction<Field>::operator();
-  typedef LinearOperatorBase<Field> FineOperator;
-  FineOperator   & _SmootherOperator;
-  Chebyshev<Field> Cheby;
-  ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) :
-    _SmootherOperator(SmootherOperator),
-    Cheby(_lo,_hi,_ord,InverseApproximation)
-  {
-    std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl;
-  };
-  void operator() (const Field &in, Field &out) 
-  {
-    Field tmp(in.Grid());
-    tmp = in;
-    Cheby(_SmootherOperator,tmp,out);
-  }
-};

 template<class Field> class CGSmoother : public LinearFunction<Field>
 {
@@ -214,9 +192,6 @@ int main (int argc, char ** argv)

  const int Ls=24;
  const int nbasis = 62;
-  //  const int nbasis = 56;
-  //  const int nbasis = 44;
-  //  const int nbasis = 36;
  const int cb = 0 ;
  RealD mass=0.00078;
  RealD M5=1.8;
@@ -253,12 +228,10 @@ int main (int argc, char ** argv)

  ///////////////////////// Configuration /////////////////////////////////
  LatticeGaugeField Umu(UGrid);
-  MemoryManager::Print();

  FieldMetaData header;
  std::string file("ckpoint_lat.1000");
  NerscIO::readConfiguration(Umu,header,file);
-  MemoryManager::Print();

  //////////////////////// Fermion action //////////////////////////////////
  MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
@@ -288,16 +261,15 @@ int main (int argc, char ** argv)
  ////////////////////////////////////////////////////////////
  LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);

-  std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.rat.18node.62");
-  std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.rat.18node.62");
-  std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.rat.18node.62");
+  std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.new.62");
+  std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.new.62");
+  std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.new.62");
  std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/evecs.scidac");
  std::string eval_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/eval.xml");
-  bool load_agg=true;
-  bool load_refine=true;
-  bool load_mat=true;
+  bool load_agg=false;
+  bool load_refine=false;
+  bool load_mat=false;
  bool load_evec=false;
-  MemoryManager::Print();

  int refine=1;
  if ( load_agg ) {
@@ -305,10 +277,11 @@ int main (int argc, char ** argv)
      LoadBasis(Aggregates,subspace_file);
    }
  } else {
-    Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
-					0.0003,1.0e-5,2000); // Lo, tol, maxit
-
+    //    Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
+    //					0.0003,1.0e-5,2000); // Lo, tol, maxit
    //    Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run
+    //    Aggregates.CreateSubspaceChebyshevNew(RNG5,HermOpEO,95.); // 176 with refinement
+    Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.001,3000,1500,200,0.0); // Attempt to resurrect
    SaveBasis(Aggregates,subspace_file);
  }

@@ -317,7 +290,9 @@ int main (int argc, char ** argv)
      LoadBasis(Aggregates,refine_file);
    } else {
      // HDCG used Pcg to refine
-      Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000);
+      //Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000); // 172 iters
+      //Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,1500); // 202 iters
+      Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,2000);   // 202 iters
      SaveBasis(Aggregates,refine_file);
    }
  }
@@ -327,7 +302,7 @@ int main (int argc, char ** argv)
    LoadOperator(LittleDiracOp,ldop_file);
  } else {
    LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
-    //    SaveOperator(LittleDiracOp,ldop_file);
+    SaveOperator(LittleDiracOp,ldop_file);
  }
  
  // I/O test:
@@ -382,13 +357,13 @@ int main (int argc, char ** argv)
  //  MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
  typedef MultiGeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> MultiGeneralCoarsenedMatrix_t;
  MultiGeneralCoarsenedMatrix_t mrhs(geom,CoarseMrhs);
-  //  mrhs.CopyMatrix(LittleDiracOp);
+  mrhs.CopyMatrix(LittleDiracOp);
  //  mrhs.SetMatrix(LittleDiracOp.);
-  mrhs.CoarsenOperator(FineHermOp,Aggregates,Coarse5d);
+  //  mrhs.CoarsenOperator(FineHermOp,Aggregates,Coarse5d);
  //  mrhs.CheckMatrix(LittleDiracOp);
  
  //////////////////////////////////////////
-  // Build a coarse lanczos
+  // Build a coarse lanczos -- -FIXME -- Must be able to run this on the mrhs operator
  //////////////////////////////////////////
  std::cout << "**************************************"<<std::endl;
  std::cout << "Building Coarse Lanczos               "<<std::endl;
@@ -411,7 +386,7 @@ int main (int argc, char ** argv)
  std::vector<RealD>            eval(Nm);
  std::vector<CoarseVector>     evec(Nm,Coarse5d);

-  PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src);
+  //  PowerMethod<CoarseVector>       cPM;   cPM(CoarseOp,c_src);

  if ( load_evec ) {
    eval.resize(Nstop);
@@ -422,17 +397,16 @@ int main (int argc, char ** argv)
    assert(Nstop==eval.size());
    SaveEigenvectors(eval,evec,evec_file,eval_file);
  }
-
  DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);

  MultiRHSDeflation<CoarseVector> MrhsGuesser;
+  MrhsGuesser.ImportEigenBasis(evec,eval);
  
  //////////////////////////////////////////
  // Build a coarse space solver
  //////////////////////////////////////////
  int maxit=30000;
-  ConjugateGradient<CoarseVector>  CG(1.0e-10,maxit,false);
-  ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,30000,false);
+  ConjugateGradient<CoarseVector>  CG(5.0e-2,maxit,false);
  ZeroGuesser<CoarseVector> CoarseZeroGuesser;
  
  HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
@@ -442,7 +416,7 @@ int main (int argc, char ** argv)
  typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
  MrhsHermMatrix MrhsCoarseOp     (mrhs);

-#if 1
+#if 0
  { 
    CoarseVector rh_res(CoarseMrhs);
    CoarseVector rh_guess(CoarseMrhs);
@@ -454,7 +428,6 @@ int main (int argc, char ** argv)
    std::cout << "*************************"<<std::endl;
    std::cout << " MrhsGuesser importing"<<std::endl;
    std::cout << "*************************"<<std::endl;
-    MrhsGuesser.ImportEigenBasis(evec,eval);
    std::vector<CoarseVector> BlasGuess(nrhs,Coarse5d);
    std::vector<CoarseVector> BlasSource(nrhs,Coarse5d);
    for(int r=0;r<nrhs;r++){
@@ -503,104 +476,64 @@ int main (int argc, char ** argv)
  //////////////////////////////////////
  // fine solve
  //////////////////////////////////////
-  
  std::vector<RealD> los({2.0});
  std::vector<int> ords({7}); 
-
- /*
- Powerlaw setup 62 vecs
-slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
-slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
-
- Cheby setup: 56vecs
- -- CG smoother O(16): 487
- 
-Power law setup, 56 vecs -- lambda^-5
-slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32
-
-Power law setup, 56 vecs -- lambda^-3
-
-slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s  1.0 32
-slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s  1.0 16
-slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s  0.5 32
-
- Power law setup, 56 vecs -- lambda^-4
- -- CG smoother    O(16): 290
- -- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
-
-Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
- */
-				      //
-  MemoryManager::Print();
  for(int l=0;l<los.size();l++){

    RealD lo = los[l];

    for(int o=0;o<ords.size();o++){

-      ConjugateGradient<CoarseVector>  CGsloppy(4.0e-2,maxit,false);
+      /////////////////////////////////////////////////
+      // Coarse sloppy solve
+      /////////////////////////////////////////////////
+      ConjugateGradient<CoarseVector>  CGsloppy(5.0e-2,maxit,false);
      HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
-      
-      //    ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
-      ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp);  // 311

+      /////////////////////////////////////////////////
+      // Mirs smoother
+      /////////////////////////////////////////////////
      RealD MirsShift = lo;
      ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
      CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
  
-      //////////////////////////////////////////
-      // Build a HDCG solver
-      //////////////////////////////////////////
-      TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
-	HDCG(1.0e-8, 700,
-	     FineHermOp,
-	     CGsmooth,
-	     HPDSolveSloppy,
-	     HPDSolve,
-	     Aggregates);
-      //      result=Zero();
-      //      std::cout << "Calling HDCG single RHS"<<std::endl;
-      //      HDCG(src,result);
-
      //////////////////////////////////////////
      // Build a HDCG mrhs solver
      //////////////////////////////////////////
-#if 1
-  MemoryManager::Print();
+      MultiRHSBlockProject<LatticeFermionD> MrhsProjector;
+      MrhsProjector.Allocate(nbasis,FrbGrid,Coarse5d);
+      MrhsProjector.ImportBasis(Aggregates.subspace);
+
      DoNothingGuesser<CoarseVector> DoNothing;
      HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,CG,DoNothing);
      HPDSolver<CoarseVector> HPDSolveMrhsSloppy(MrhsCoarseOp,CGsloppy,DoNothing);
-      TwoLevelADEF2mrhs<LatticeFermion,CoarseVector,Subspace>
+      TwoLevelADEF2mrhs<LatticeFermion,CoarseVector>
 	HDCGmrhs(1.0e-8, 500,
 		 FineHermOp,
 		 CGsmooth,
-		 //		 HPDSolveSloppy, // Never used
-		 //		 HPDSolve,       // Used in Vstart
 		 HPDSolveMrhsSloppy,    // Used in M1
 		 HPDSolveMrhs,          // Used in Vstart
-		 DeflCoarseGuesser, // single RHS guess used in M1
-		 CoarseMrhs,        // Grid needed to Mrhs grid
-		 Aggregates);
+		 MrhsProjector,
+		 MrhsGuesser,
+		 CoarseMrhs);

      std::cout << "Calling mRHS HDCG"<<std::endl;
-      FrbGrid->Barrier();
      
      std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid);
      std::cout << " mRHS source"<<std::endl;
      std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid);
      std::cout << " mRHS result"<<std::endl;

-  random(RNG5,src_mrhs[0]);
-  for(int r=0;r<nrhs;r++){
-	if(r>0)src_mrhs[r]=src_mrhs[0];
+      for(int r=0;r<nrhs;r++){
+	random(RNG5,src_mrhs[r]);
+	//	if(r>0)src_mrhs[r]=src_mrhs[0];
 	res_mrhs[r]=Zero();
 	std::cout << "Setup mrhs source "<<r<<std::endl;
-  }
-  std::cout << "Calling the mRHS HDCG"<<std::endl;
-  MemoryManager::Print();
-  HDCGmrhs(src_mrhs,res_mrhs);
-  MemoryManager::Print();
-#endif
+      }
+
+      std::cout << "Calling the mRHS HDCG"<<std::endl;
+      HDCGmrhs(src_mrhs,res_mrhs);
+
    }
  }

@@ -610,6 +543,7 @@ Conclusion: higher order smoother is doing better. Much better. Use a Krylov smo
    LatticeFermion result(FrbGrid); result=Zero();
    LatticeFermion    src(FrbGrid); random(RNG5,src);
    result=Zero();
+    ConjugateGradient<LatticeFermionD>  CGfine(1.0e-8,30000,false);
    CGfine(HermOpEO, src, result);
  }
 #endif