1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Improved, works on 48^3 moving to multiRHS optimisations

This commit is contained in:
Peter Boyle 2023-11-15 18:03:05 -05:00
parent f4fa95e7cb
commit 09946cf1ba

View File

@ -67,6 +67,22 @@ void LoadOperator(Coarsened &Operator,std::string file)
Operator.ExchangeCoarseLinks(); Operator.ExchangeCoarseLinks();
#endif #endif
} }
template<class Coarsened>
void ReLoadOperator(Coarsened &Operator,std::string file)
{
#ifdef HAVE_LIME
emptyUserRecord record;
Grid::ScidacReader RD ;
RD.open(file);
assert(Operator._A.size()==Operator.geom.npoint);
for(int p=0;p<Operator.geom.npoint;p++){
auto tmp=Operator.Cell.Extract(Operator._A[p]);
RD.readScidacFieldRecord(tmp,record,0);
Operator._A[p] = Operator.Cell.ExchangePeriodic(tmp);
}
RD.close();
#endif
}
template<class aggregation> template<class aggregation>
void SaveBasis(aggregation &Agg,std::string file) void SaveBasis(aggregation &Agg,std::string file)
{ {
@ -96,14 +112,6 @@ void LoadBasis(aggregation &Agg, std::string file)
#endif #endif
} }
template<class Field> class TestSolver : public LinearFunction<Field> {
public:
TestSolver() {};
void operator() (const Field &in, Field &out){ out = Zero(); }
};
RealD InverseApproximation(RealD x){ RealD InverseApproximation(RealD x){
return 1.0/x; return 1.0/x;
} }
@ -123,7 +131,7 @@ public:
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); }; void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
}; };
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field> template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
{ {
public: public:
using LinearFunction<Field>::operator(); using LinearFunction<Field>::operator();
@ -144,12 +152,35 @@ public:
} }
}; };
template<class Field> class CGSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
FineOperator & _SmootherOperator;
int iters;
CGSmoother(int _iters, FineOperator &SmootherOperator) :
_SmootherOperator(SmootherOperator),
iters(_iters)
{
std::cout << GridLogMessage<<" Mirs smoother order "<<iters<<std::endl;
};
void operator() (const Field &in, Field &out)
{
ConjugateGradient<Field> CG(0.0,iters,false); // non-converge is just fine in a smoother
CG(_SmootherOperator,in,out);
}
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
const int Ls=24; const int Ls=24;
const int nbasis = 40; const int nbasis = 62;
// const int nbasis = 56;
// const int nbasis = 44;
const int cb = 0 ; const int cb = 0 ;
RealD mass=0.00078; RealD mass=0.00078;
RealD M5=1.8; RealD M5=1.8;
@ -164,10 +195,12 @@ int main (int argc, char ** argv)
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
// Construct a coarsened grid with 4^4 cell // Construct a coarsened grid with 4^4 cell
Coordinate Block({4,4,6,4});
Coordinate clatt = GridDefaultLatt(); Coordinate clatt = GridDefaultLatt();
for(int d=0;d<clatt.size();d++){ for(int d=0;d<clatt.size();d++){
clatt[d] = clatt[d]/4; clatt[d] = clatt[d]/Block[d];
} }
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt,
GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());; GridDefaultMpi());;
@ -186,7 +219,7 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
FieldMetaData header; FieldMetaData header;
std::string file("ckpoint_lat.2250"); std::string file("ckpoint_lat.1000");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);
//////////////////////// Fermion action ////////////////////////////////// //////////////////////// Fermion action //////////////////////////////////
@ -204,7 +237,6 @@ int main (int argc, char ** argv)
// Run power method on FineHermOp // Run power method on FineHermOp
PowerMethod<LatticeFermion> PM; PM(HermOpEO,src); PowerMethod<LatticeFermion> PM; PM(HermOpEO,src);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
///////////// Coarse basis and Little Dirac Operator /////// ///////////// Coarse basis and Little Dirac Operator ///////
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -223,21 +255,96 @@ int main (int argc, char ** argv)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d); LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys.nolex.scidac"); std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.rat.scidac.62");
std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys.nolex.scidac"); std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.rat.scidac.62");
std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.rat.scidac.62");
bool load_agg=true; bool load_agg=true;
bool load_refine=true;
bool load_mat=true; bool load_mat=true;
if ( load_agg ) { if ( load_agg ) {
LoadBasis(Aggregates,subspace_file); LoadBasis(Aggregates,subspace_file);
} else { } else {
// NBASIS=40
// Best so far: ord 2000 [0.01,95], 500,500 -- 466 iters
// slurm-398626.out:Grid : Message : 141.295253 s : 500 filt [1] <n|MdagM|n> 0.000103622063
//Grid : Message : 33.870465 s : Chebyshev subspace pass-1 : ord 2000 [0.001,95]
//Grid : Message : 33.870485 s : Chebyshev subspace pass-2 : nbasis40 min 1000 step 1000 lo0
//slurm-1482200.out : filt ~ 0.004 -- not as low mode projecting -- took 626 iters
// To try: 2000 [0.1,95] ,2000,500,500 -- slurm-1482213.out 586 iterations
// To try: 2000 [0.01,95] ,2000,500,500 -- 469 (think I bumped 92 to 95) (??)
// To try: 2000 [0.025,95],2000,500,500
// To try: 2000 [0.005,95],2000,500,500
// NBASIS=44 -- HDCG paper was 64 vectors; AMD compiler craps out at 48
// To try: 2000 [0.01,95] ,2000,500,500 -- 419 lowest slurm-1482355.out
// To try: 2000 [0.025,95] ,2000,500,500 -- 487
// To try: 2000 [0.005,95] ,2000,500,500
/*
Smoother [3,92] order 16
slurm-1482355.out:Grid : Message : 35.239686 s : Chebyshev subspace pass-1 : ord 2000 [0.01,95]
slurm-1482355.out:Grid : Message : 35.239714 s : Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0
slurm-1482355.out:Grid : Message : 5561.305552 s : HDCG: Pcg converged in 419 iterations and 2616.202598 s
slurm-1482367.out:Grid : Message : 43.157235 s : Chebyshev subspace pass-1 : ord 2000 [0.025,95]
slurm-1482367.out:Grid : Message : 43.157257 s : Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0
slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 iterations and 3131.185821 s
*/
/*
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
95.0,0.0075,
2500,
500,
500,
0.0);
*/
/*
Aggregates.CreateSubspaceChebyshevPowerLaw(RNG5,HermOpEO,nbasis,
95.0,
2000);
*/
Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
0.0003,1.0e-5,2000); // Lo, tol, maxit
/*
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis, Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
95.0,0.05, 95.0,0.05,
1000, 2000,
200, 500,
200, 500,
0.0); 0.0);
*/
/*
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
95.0,0.01,
2000,
500,
500,
0.0);
*/
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); -- running slurm-1484934.out nbasis 56
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run
SaveBasis(Aggregates,subspace_file); SaveBasis(Aggregates,subspace_file);
} }
int refine=1;
if(refine){
if ( load_refine ) {
LoadBasis(Aggregates,refine_file);
} else {
// HDCG used Pcg to refine
Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000);
SaveBasis(Aggregates,refine_file);
}
}
Aggregates.Orthogonalise();
if ( load_mat ) { if ( load_mat ) {
LoadOperator(LittleDiracOp,ldop_file); LoadOperator(LittleDiracOp,ldop_file);
} else { } else {
@ -245,7 +352,13 @@ int main (int argc, char ** argv)
SaveOperator(LittleDiracOp,ldop_file); SaveOperator(LittleDiracOp,ldop_file);
} }
// I/O test:
CoarseVector c_src(Coarse5d); random(CRNG,c_src);
CoarseVector c_res(Coarse5d);
CoarseVector c_ref(Coarse5d);
// Try projecting to one hop only // Try projecting to one hop only
// LittleDiracOp.ShiftMatrix(1.0e-4);
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d); LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
@ -256,21 +369,28 @@ int main (int argc, char ** argv)
////////////////////////////////////////// //////////////////////////////////////////
// Build a coarse lanczos // Build a coarse lanczos
////////////////////////////////////////// //////////////////////////////////////////
// Chebyshev<CoarseVector> IRLCheby(0.01,44.0,201); // 1 iter // Chebyshev<CoarseVector> IRLCheby(0.012,40.0,201); //500 HDCG iters
Chebyshev<CoarseVector> IRLCheby(0.005,44.0,401); // 1 iter // int Nk=512; // Didn't save much
// int Nm=640;
// int Nstop=400;
// Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
// int Nk=128;
// int Nm=160;
Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
int Nk=192;
int Nm=256;
int Nstop=Nk;
// Chebyshev<CoarseVector> IRLCheby(0.010,45.0,201); // 1 iter
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp); FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp);
PlainHermOp<CoarseVector> IRLOp (CoarseOp); PlainHermOp<CoarseVector> IRLOp (CoarseOp);
int Nk=160;
int Nm=240; ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1e-5,10);
int Nstop=Nk;
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20);
int Nconv; int Nconv;
std::vector<RealD> eval(Nm); std::vector<RealD> eval(Nm);
std::vector<CoarseVector> evec(Nm,Coarse5d); std::vector<CoarseVector> evec(Nm,Coarse5d);
CoarseVector c_src(Coarse5d); c_src=1.0;
CoarseVector c_res(Coarse5d);
CoarseVector c_ref(Coarse5d);
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src); PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
@ -280,9 +400,9 @@ int main (int argc, char ** argv)
////////////////////////////////////////// //////////////////////////////////////////
// Build a coarse space solver // Build a coarse space solver
////////////////////////////////////////// //////////////////////////////////////////
int maxit=20000; int maxit=30000;
ConjugateGradient<CoarseVector> CG(1.0e-8,maxit,false); ConjugateGradient<CoarseVector> CG(1.0e-10,maxit,false);
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,10000,false); ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
ZeroGuesser<CoarseVector> CoarseZeroGuesser; ZeroGuesser<CoarseVector> CoarseZeroGuesser;
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser); // HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
@ -306,8 +426,7 @@ int main (int argc, char ** argv)
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Coarse ADEF1 with deflation space // Coarse ADEF1 with deflation space
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
ChebyshevSmoother<CoarseVector,HermMatrix > ChebyshevSmoother<CoarseVector > CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
// CoarseSmoother(0.1,37.,8,CoarseOpProj); // // CoarseSmoother(0.1,37.,8,CoarseOpProj); //
// CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s // CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s
// CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s // CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s
@ -375,41 +494,134 @@ int main (int argc, char ** argv)
// -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and // -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and
// use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec // use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec
// //
std::vector<RealD> los({3.0}); // Nbasis 40 == 36,36 iters //
//
//
std::vector<RealD> los({2.0,2.5}); // Nbasis 40 == 36,36 iters
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults) // std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults) // std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
std::vector<int> ords({9}); // Nbasis 40 == 40 iters (320 mults)
/*
Smoother opt @56 nbasis, 0.04 convergence, 192 evs
ord lo
16 0.1 no converge -- likely sign indefinite
32 0.1 no converge -- likely sign indefinite(?)
16 0.5 422
32 0.5 302
8 1.0 575
12 1.0 449
16 1.0 375
32 1.0 302
12 3.0 476
16 3.0 319
32 3.0 306
Powerlaw setup 62 vecs
slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
Cheby setup: 56vecs
-- CG smoother O(16): 487
Power law setup, 56 vecs -- lambda^-5
slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32
Power law setup, 56 vecs -- lambda^-3
slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s 1.0 32
slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s 1.0 16
slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s 0.5 32
Power law setup, 56 vecs -- lambda^-4
-- CG smoother O(16): 290
-- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
Grid : Message : 2790.797194 s : HDCG: Pcg converged in 190 iterations and 1049.563182 s 1.0 32
Grid : Message : 3766.374396 s : HDCG: Pcg converged in 218 iterations and 975.455668 s 1.0 16
Grid : Message : 4888.746190 s : HDCG: Pcg converged in 191 iterations and 1122.252055 s 0.5 32
Grid : Message : 5956.679661 s : HDCG: Pcg converged in 231 iterations and 1067.812850 s 0.5 16
Grid : Message : 2767.405829 s : HDCG: Pcg converged in 218 iterations and 967.214067 s -- 16
Grid : Message : 3816.165905 s : HDCG: Pcg converged in 251 iterations and 1048.636269 s -- 12
Grid : Message : 5121.206572 s : HDCG: Pcg converged in 318 iterations and 1304.916168 s -- 8
[paboyle@login2.crusher debug]$ grep -v Memory slurm-402426.out | grep converged | grep HDCG -- [1.0,16] cheby
Grid : Message : 5185.521063 s : HDCG: Pcg converged in 377 iterations and 1595.843529 s
[paboyle@login2.crusher debug]$ grep HDCG slurm-402184.out | grep onver
Grid : Message : 3760.438160 s : HDCG: Pcg converged in 422 iterations and 2129.243141 s
Grid : Message : 5660.588015 s : HDCG: Pcg converged in 308 iterations and 1900.026821 s
Grid : Message : 4238.206528 s : HDCG: Pcg converged in 575 iterations and 2657.430676 s
Grid : Message : 6345.880344 s : HDCG: Pcg converged in 449 iterations and 2108.505208 s
grep onverg slurm-401663.out | grep HDCG
Grid : Message : 3900.817781 s : HDCG: Pcg converged in 476 iterations and 1992.591311 s
Grid : Message : 5647.202699 s : HDCG: Pcg converged in 306 iterations and 1746.838660 s
[paboyle@login2.crusher debug]$ grep converged slurm-401775.out | grep HDCG
Grid : Message : 3583.177025 s : HDCG: Pcg converged in 375 iterations and 1800.896037 s
Grid : Message : 5348.342243 s : HDCG: Pcg converged in 302 iterations and 1765.045018 s
Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
*/
//
for(int l=0;l<los.size();l++){ for(int l=0;l<los.size();l++){
RealD lo = los[l]; RealD lo = los[l];
for(int o=0;o<ords.size();o++){ for(int o=0;o<ords.size();o++){
ConjugateGradient<CoarseVector> CGsloppy(5.0e-2,maxit,false); ConjugateGradient<CoarseVector> CGsloppy(4.0e-2,maxit,false);
HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser); HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case // ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,ords[o],FineHermOp); // 311 ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp); // 311
/*
* CG smooth 11 iter:
slurm-403825.out:Grid : Message : 4369.824339 s : HDCG: fPcg converged in 215 iterations 3.0
slurm-403908.out:Grid : Message : 3955.897470 s : HDCG: fPcg converged in 236 iterations 1.0
slurm-404273.out:Grid : Message : 3843.792191 s : HDCG: fPcg converged in 210 iterations 2.0
* CG smooth 9 iter:
*/
//
RealD MirsShift = lo;
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
////////////////////////////////////////// //////////////////////////////////////////
// Build a HDCG solver // Build a HDCG solver
////////////////////////////////////////// //////////////////////////////////////////
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCG(1.0e-8, 100, HDCG(1.0e-8, 700,
FineHermOp, FineHermOp,
Smoother, // ChebySmooth,
CGsmooth,
HPDSolveSloppy, HPDSolveSloppy,
HPDSolve, HPDSolve,
Aggregates); Aggregates);
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace> /*
HDCGdefl(1.0e-8, 100, TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCGdefl(1.0e-8, 700,
FineHermOp, FineHermOp,
Smoother, Smoother,
cADEF1, cADEF1,
HPDSolve, HPDSolve,
Aggregates); Aggregates);
*/
// result=Zero(); // result=Zero();
// HDCGdefl(src,result); // HDCGdefl(src,result);