1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Improvements

This commit is contained in:
Peter Boyle 2024-04-01 14:18:40 -04:00
parent da890dc293
commit 5b79d51c22
2 changed files with 65 additions and 328 deletions

View File

@ -131,6 +131,7 @@ public:
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
};
/*
template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
@ -151,7 +152,7 @@ public:
Cheby(_SmootherOperator,tmp,out);
}
};
*/
template<class Field> class CGSmoother : public LinearFunction<Field>
{
public:
@ -179,8 +180,6 @@ int main (int argc, char ** argv)
const int Ls=24;
const int nbasis = 62;
// const int nbasis = 56;
// const int nbasis = 44;
const int cb = 0 ;
RealD mass=0.00078;
RealD M5=1.8;
@ -357,31 +356,17 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
CoarseVector c_res(Coarse5d);
CoarseVector c_ref(Coarse5d);
// Try projecting to one hop only
// LittleDiracOp.ShiftMatrix(1.0e-4);
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
HermMatrix CoarseOp (LittleDiracOp);
HermMatrix CoarseOpProj (LittleDiracOpProj);
//////////////////////////////////////////
// Build a coarse lanczos
//////////////////////////////////////////
// Chebyshev<CoarseVector> IRLCheby(0.012,40.0,201); //500 HDCG iters
// int Nk=512; // Didn't save much
// int Nm=640;
// int Nstop=400;
// Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
// int Nk=128;
// int Nm=160;
Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
HermMatrix CoarseOp (LittleDiracOp);
int Nk=192;
int Nm=256;
int Nstop=Nk;
Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201);
// Chebyshev<CoarseVector> IRLCheby(0.010,45.0,201); // 1 iter
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp);
PlainHermOp<CoarseVector> IRLOp (CoarseOp);
@ -395,208 +380,40 @@ slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 it
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
IRL.calc(eval,evec,c_src,Nconv);
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
//////////////////////////////////////////
// Build a coarse space solver
// Deflated guesser
//////////////////////////////////////////
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
int maxit=30000;
ConjugateGradient<CoarseVector> CG(1.0e-10,maxit,false);
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
c_res=Zero();
// HPDSolve(c_src,c_res); c_ref = c_res;
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
// std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl;
//////////////////////////////////////////////////////////////////////////
// Deflated (with real op EV's) solve for the projected coarse op
// Work towards ADEF1 in the coarse space
//////////////////////////////////////////////////////////////////////////
HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
c_res=Zero();
// HPDSolveProj(c_src,c_res);
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
// std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl;
// c_res = c_res - c_ref;
// std::cout << "Projected solver error "<<norm2(c_res)<<std::endl;
//////////////////////////////////////////////////////////////////////
// Coarse ADEF1 with deflation space
//////////////////////////////////////////////////////////////////////
ChebyshevSmoother<CoarseVector > CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
// CoarseSmoother(0.1,37.,8,CoarseOpProj); //
// CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s
// CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s
// CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter
// CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj); // 311
////////////////////////////////////////////////////////
// CG, Cheby mode spacing 200,200
// Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
// Unprojected Coarse CG solve to 4e-2 : 33 iters, 0.8s
// Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
////////////////////////////////////////////////////////
// CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs
////////////////////////////////////////////////////////
// ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s 2.1x gain
// ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
// HDCG 38 iters 162s
//
// CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs
// ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s 2.1x gain
// ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
// HDCG 38 iters 169s
TwoLevelADEF1defl<CoarseVector>
cADEF1(1.0e-8, 500,
CoarseOp,
CoarseSmoother,
evec,eval);
// c_res=Zero();
// cADEF1(c_src,c_res);
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
// std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
// c_res = c_res - c_ref;
// std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
// cADEF1.Tolerance = 4.0e-2;
// cADEF1.Tolerance = 1.0e-1;
// cADEF1.Tolerance = 5.0e-2;
// c_res=Zero();
// cADEF1(c_src,c_res);
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
// std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
// c_res = c_res - c_ref;
// std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
//////////////////////////////////////////
// Build a smoother
// HDCG
//////////////////////////////////////////
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp); //383
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp); //328
// std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1
// std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations
// std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters
// std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters
//
// Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40
// Need to measure cost of coarse space.
//
// -- i) Reduce coarse residual -- 0.04
// -- ii) Lanczos on coarse space -- done
// -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and
// use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec
//
//
//
//
std::vector<RealD> los({2.0,2.5}); // Nbasis 40 == 36,36 iters
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
// std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
std::vector<int> ords({9}); // Nbasis 40 == 40 iters (320 mults)
/*
Smoother opt @56 nbasis, 0.04 convergence, 192 evs
ord lo
16 0.1 no converge -- likely sign indefinite
32 0.1 no converge -- likely sign indefinite(?)
16 0.5 422
32 0.5 302
8 1.0 575
12 1.0 449
16 1.0 375
32 1.0 302
12 3.0 476
16 3.0 319
32 3.0 306
Powerlaw setup 62 vecs
slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
Cheby setup: 56vecs
-- CG smoother O(16): 487
Power law setup, 56 vecs -- lambda^-5
slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32
Power law setup, 56 vecs -- lambda^-3
slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s 1.0 32
slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s 1.0 16
slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s 0.5 32
Power law setup, 56 vecs -- lambda^-4
-- CG smoother O(16): 290
-- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
Grid : Message : 2790.797194 s : HDCG: Pcg converged in 190 iterations and 1049.563182 s 1.0 32
Grid : Message : 3766.374396 s : HDCG: Pcg converged in 218 iterations and 975.455668 s 1.0 16
Grid : Message : 4888.746190 s : HDCG: Pcg converged in 191 iterations and 1122.252055 s 0.5 32
Grid : Message : 5956.679661 s : HDCG: Pcg converged in 231 iterations and 1067.812850 s 0.5 16
Grid : Message : 2767.405829 s : HDCG: Pcg converged in 218 iterations and 967.214067 s -- 16
Grid : Message : 3816.165905 s : HDCG: Pcg converged in 251 iterations and 1048.636269 s -- 12
Grid : Message : 5121.206572 s : HDCG: Pcg converged in 318 iterations and 1304.916168 s -- 8
[paboyle@login2.crusher debug]$ grep -v Memory slurm-402426.out | grep converged | grep HDCG -- [1.0,16] cheby
Grid : Message : 5185.521063 s : HDCG: Pcg converged in 377 iterations and 1595.843529 s
[paboyle@login2.crusher debug]$ grep HDCG slurm-402184.out | grep onver
Grid : Message : 3760.438160 s : HDCG: Pcg converged in 422 iterations and 2129.243141 s
Grid : Message : 5660.588015 s : HDCG: Pcg converged in 308 iterations and 1900.026821 s
Grid : Message : 4238.206528 s : HDCG: Pcg converged in 575 iterations and 2657.430676 s
Grid : Message : 6345.880344 s : HDCG: Pcg converged in 449 iterations and 2108.505208 s
grep onverg slurm-401663.out | grep HDCG
Grid : Message : 3900.817781 s : HDCG: Pcg converged in 476 iterations and 1992.591311 s
Grid : Message : 5647.202699 s : HDCG: Pcg converged in 306 iterations and 1746.838660 s
[paboyle@login2.crusher debug]$ grep converged slurm-401775.out | grep HDCG
Grid : Message : 3583.177025 s : HDCG: Pcg converged in 375 iterations and 1800.896037 s
Grid : Message : 5348.342243 s : HDCG: Pcg converged in 302 iterations and 1765.045018 s
Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
*/
//
for(int l=0;l<los.size();l++){
RealD lo = los[l];
for(int o=0;o<ords.size();o++){
//////////////////////////////////////////
// Sloppy coarse solve
//////////////////////////////////////////
ConjugateGradient<CoarseVector> CGsloppy(4.0e-2,maxit,false);
HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp); // 311
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
/*
* CG smooth 11 iter:
slurm-403825.out:Grid : Message : 4369.824339 s : HDCG: fPcg converged in 215 iterations 3.0
slurm-403908.out:Grid : Message : 3955.897470 s : HDCG: fPcg converged in 236 iterations 1.0
slurm-404273.out:Grid : Message : 3843.792191 s : HDCG: fPcg converged in 210 iterations 2.0
* CG smooth 9 iter:
*/
//
//////////////////////////////////////////
// IRS shifted smoother based on CG
//////////////////////////////////////////
RealD MirsShift = lo;
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
@ -607,25 +424,11 @@ Conclusion: higher order smoother is doing better. Much better. Use a Krylov smo
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCG(1.0e-8, 700,
FineHermOp,
// ChebySmooth,
CGsmooth,
HPDSolveSloppy,
HPDSolve,
Aggregates);
/*
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCGdefl(1.0e-8, 700,
FineHermOp,
Smoother,
cADEF1,
HPDSolve,
Aggregates);
*/
// result=Zero();
// HDCGdefl(src,result);
result=Zero();
HDCG(src,result);

View File

@ -27,6 +27,8 @@ Author: Peter Boyle <pboyle@bnl.gov>
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/AdefMrhs.h>
using namespace std;
using namespace Grid;
@ -146,10 +148,6 @@ void LoadEigenvectors(std::vector<RealD> &eval,
#endif
}
RealD InverseApproximation(RealD x){
return 1.0/x;
}
// Want Op in CoarsenOp to call MatPcDagMatPc
template<class Field>
class HermOpAdaptor : public LinearOperatorBase<Field>
@ -165,26 +163,6 @@ public:
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
};
template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
FineOperator & _SmootherOperator;
Chebyshev<Field> Cheby;
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) :
_SmootherOperator(SmootherOperator),
Cheby(_lo,_hi,_ord,InverseApproximation)
{
std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl;
};
void operator() (const Field &in, Field &out)
{
Field tmp(in.Grid());
tmp = in;
Cheby(_SmootherOperator,tmp,out);
}
};
template<class Field> class CGSmoother : public LinearFunction<Field>
{
@ -214,9 +192,6 @@ int main (int argc, char ** argv)
const int Ls=24;
const int nbasis = 62;
// const int nbasis = 56;
// const int nbasis = 44;
// const int nbasis = 36;
const int cb = 0 ;
RealD mass=0.00078;
RealD M5=1.8;
@ -253,12 +228,10 @@ int main (int argc, char ** argv)
///////////////////////// Configuration /////////////////////////////////
LatticeGaugeField Umu(UGrid);
MemoryManager::Print();
FieldMetaData header;
std::string file("ckpoint_lat.1000");
NerscIO::readConfiguration(Umu,header,file);
MemoryManager::Print();
//////////////////////// Fermion action //////////////////////////////////
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
@ -288,16 +261,15 @@ int main (int argc, char ** argv)
////////////////////////////////////////////////////////////
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.rat.18node.62");
std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.rat.18node.62");
std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.rat.18node.62");
std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.new.62");
std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.new.62");
std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.new.62");
std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/evecs.scidac");
std::string eval_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/eval.xml");
bool load_agg=true;
bool load_refine=true;
bool load_mat=true;
bool load_agg=false;
bool load_refine=false;
bool load_mat=false;
bool load_evec=false;
MemoryManager::Print();
int refine=1;
if ( load_agg ) {
@ -305,10 +277,11 @@ int main (int argc, char ** argv)
LoadBasis(Aggregates,subspace_file);
}
} else {
Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
0.0003,1.0e-5,2000); // Lo, tol, maxit
// Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
// 0.0003,1.0e-5,2000); // Lo, tol, maxit
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run
// Aggregates.CreateSubspaceChebyshevNew(RNG5,HermOpEO,95.); // 176 with refinement
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.001,3000,1500,200,0.0); // Attempt to resurrect
SaveBasis(Aggregates,subspace_file);
}
@ -317,7 +290,9 @@ int main (int argc, char ** argv)
LoadBasis(Aggregates,refine_file);
} else {
// HDCG used Pcg to refine
Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000);
//Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000); // 172 iters
//Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,1500); // 202 iters
Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,2000); // 202 iters
SaveBasis(Aggregates,refine_file);
}
}
@ -327,7 +302,7 @@ int main (int argc, char ** argv)
LoadOperator(LittleDiracOp,ldop_file);
} else {
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
// SaveOperator(LittleDiracOp,ldop_file);
SaveOperator(LittleDiracOp,ldop_file);
}
// I/O test:
@ -382,13 +357,13 @@ int main (int argc, char ** argv)
// MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
typedef MultiGeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> MultiGeneralCoarsenedMatrix_t;
MultiGeneralCoarsenedMatrix_t mrhs(geom,CoarseMrhs);
// mrhs.CopyMatrix(LittleDiracOp);
mrhs.CopyMatrix(LittleDiracOp);
// mrhs.SetMatrix(LittleDiracOp.);
mrhs.CoarsenOperator(FineHermOp,Aggregates,Coarse5d);
// mrhs.CoarsenOperator(FineHermOp,Aggregates,Coarse5d);
// mrhs.CheckMatrix(LittleDiracOp);
//////////////////////////////////////////
// Build a coarse lanczos
// Build a coarse lanczos -- -FIXME -- Must be able to run this on the mrhs operator
//////////////////////////////////////////
std::cout << "**************************************"<<std::endl;
std::cout << "Building Coarse Lanczos "<<std::endl;
@ -411,7 +386,7 @@ int main (int argc, char ** argv)
std::vector<RealD> eval(Nm);
std::vector<CoarseVector> evec(Nm,Coarse5d);
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
// PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
if ( load_evec ) {
eval.resize(Nstop);
@ -422,17 +397,16 @@ int main (int argc, char ** argv)
assert(Nstop==eval.size());
SaveEigenvectors(eval,evec,evec_file,eval_file);
}
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
MultiRHSDeflation<CoarseVector> MrhsGuesser;
MrhsGuesser.ImportEigenBasis(evec,eval);
//////////////////////////////////////////
// Build a coarse space solver
//////////////////////////////////////////
int maxit=30000;
ConjugateGradient<CoarseVector> CG(1.0e-10,maxit,false);
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
ConjugateGradient<CoarseVector> CG(5.0e-2,maxit,false);
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
@ -442,7 +416,7 @@ int main (int argc, char ** argv)
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
MrhsHermMatrix MrhsCoarseOp (mrhs);
#if 1
#if 0
{
CoarseVector rh_res(CoarseMrhs);
CoarseVector rh_guess(CoarseMrhs);
@ -454,7 +428,6 @@ int main (int argc, char ** argv)
std::cout << "*************************"<<std::endl;
std::cout << " MrhsGuesser importing"<<std::endl;
std::cout << "*************************"<<std::endl;
MrhsGuesser.ImportEigenBasis(evec,eval);
std::vector<CoarseVector> BlasGuess(nrhs,Coarse5d);
std::vector<CoarseVector> BlasSource(nrhs,Coarse5d);
for(int r=0;r<nrhs;r++){
@ -503,104 +476,64 @@ int main (int argc, char ** argv)
//////////////////////////////////////
// fine solve
//////////////////////////////////////
std::vector<RealD> los({2.0});
std::vector<int> ords({7});
/*
Powerlaw setup 62 vecs
slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
Cheby setup: 56vecs
-- CG smoother O(16): 487
Power law setup, 56 vecs -- lambda^-5
slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32
Power law setup, 56 vecs -- lambda^-3
slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s 1.0 32
slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s 1.0 16
slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s 0.5 32
Power law setup, 56 vecs -- lambda^-4
-- CG smoother O(16): 290
-- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
*/
//
MemoryManager::Print();
for(int l=0;l<los.size();l++){
RealD lo = los[l];
for(int o=0;o<ords.size();o++){
ConjugateGradient<CoarseVector> CGsloppy(4.0e-2,maxit,false);
/////////////////////////////////////////////////
// Coarse sloppy solve
/////////////////////////////////////////////////
ConjugateGradient<CoarseVector> CGsloppy(5.0e-2,maxit,false);
HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp); // 311
/////////////////////////////////////////////////
// Mirs smoother
/////////////////////////////////////////////////
RealD MirsShift = lo;
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
//////////////////////////////////////////
// Build a HDCG solver
//////////////////////////////////////////
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCG(1.0e-8, 700,
FineHermOp,
CGsmooth,
HPDSolveSloppy,
HPDSolve,
Aggregates);
// result=Zero();
// std::cout << "Calling HDCG single RHS"<<std::endl;
// HDCG(src,result);
//////////////////////////////////////////
// Build a HDCG mrhs solver
//////////////////////////////////////////
#if 1
MemoryManager::Print();
MultiRHSBlockProject<LatticeFermionD> MrhsProjector;
MrhsProjector.Allocate(nbasis,FrbGrid,Coarse5d);
MrhsProjector.ImportBasis(Aggregates.subspace);
DoNothingGuesser<CoarseVector> DoNothing;
HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,CG,DoNothing);
HPDSolver<CoarseVector> HPDSolveMrhsSloppy(MrhsCoarseOp,CGsloppy,DoNothing);
TwoLevelADEF2mrhs<LatticeFermion,CoarseVector,Subspace>
TwoLevelADEF2mrhs<LatticeFermion,CoarseVector>
HDCGmrhs(1.0e-8, 500,
FineHermOp,
CGsmooth,
// HPDSolveSloppy, // Never used
// HPDSolve, // Used in Vstart
HPDSolveMrhsSloppy, // Used in M1
HPDSolveMrhs, // Used in Vstart
DeflCoarseGuesser, // single RHS guess used in M1
CoarseMrhs, // Grid needed to Mrhs grid
Aggregates);
MrhsProjector,
MrhsGuesser,
CoarseMrhs);
std::cout << "Calling mRHS HDCG"<<std::endl;
FrbGrid->Barrier();
std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid);
std::cout << " mRHS source"<<std::endl;
std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid);
std::cout << " mRHS result"<<std::endl;
random(RNG5,src_mrhs[0]);
for(int r=0;r<nrhs;r++){
if(r>0)src_mrhs[r]=src_mrhs[0];
for(int r=0;r<nrhs;r++){
random(RNG5,src_mrhs[r]);
// if(r>0)src_mrhs[r]=src_mrhs[0];
res_mrhs[r]=Zero();
std::cout << "Setup mrhs source "<<r<<std::endl;
}
std::cout << "Calling the mRHS HDCG"<<std::endl;
MemoryManager::Print();
HDCGmrhs(src_mrhs,res_mrhs);
MemoryManager::Print();
#endif
}
std::cout << "Calling the mRHS HDCG"<<std::endl;
HDCGmrhs(src_mrhs,res_mrhs);
}
}
@ -610,6 +543,7 @@ Conclusion: higher order smoother is doing better. Much better. Use a Krylov smo
LatticeFermion result(FrbGrid); result=Zero();
LatticeFermion src(FrbGrid); random(RNG5,src);
result=Zero();
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
CGfine(HermOpEO, src, result);
}
#endif