1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-15 02:05:37 +00:00
Grid/tests/lanczos/Test_dwf_compressed_lanczos.cc
2018-03-08 23:02:19 -05:00

714 lines
22 KiB
C++

/*
Authors: Christoph Lehner
Date: 2017
Multigrid Lanczos
TODO:
High priority:
- Explore filtering of starting vector again, should really work: If cheby has 4 for low mode region and 1 for high mode, applying 15 iterations has 1e9 suppression
of high modes, which should create the desired invariant subspace already? Missing something here??? Maybe dynamic range dangerous, i.e., could also kill interesting
eigenrange if not careful.
Better: Use all Cheby up to order N in order to approximate a step function; try this! Problem: width of step function. Can kill eigenspace > 1e-3 and have < 1e-5 equal
to 1
Low priority:
- Given that I seem to need many restarts and high degree poly to create the base and this takes about 1 day, seriously consider a simple method to create a basis
(ortho krylov low poly); and then fix up lowest say 200 eigenvalues by 1 run with high-degree poly (600 could be enough)
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/parallelIO/IldgIO.h>
/////////////////////////////////////////////////////////////////////////////
// The following are now decoupled from the Lanczos and deal with grids.
// Safe to replace functionality
/////////////////////////////////////////////////////////////////////////////
#include "BlockedGrid.h"
#include "FieldBasisVector.h"
#include "BlockProjector.h"
#include "FieldVectorIO.h"
#include "Params.h"
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
bool read_evals(GridBase* _grid, char* fn, std::vector<RealD>& evals) {
FILE* f = 0;
uint32_t status = 0;
if (_grid->IsBoss()) {
f = fopen(fn,"rt");
status = f ? 1 : 0;
}
_grid->GlobalSum(status);
if (!status)
return false;
uint32_t N;
if (f)
assert(fscanf(f,"%d\n",&N)==1);
else
N = 0;
_grid->GlobalSum(N);
std::cout << "Reading " << N << " eigenvalues" << std::endl;
evals.resize(N);
for (int i=0;i<N;i++) {
if (f)
assert(fscanf(f,"%lf",&evals[i])==1);
else
evals[i] = 0;
}
_grid->GlobalSumVector(&evals[0],evals.size());
if (f)
fclose(f);
return true;
}
void write_evals(char* fn, std::vector<RealD>& evals) {
FILE* f = fopen(fn,"wt");
assert(f);
int N = (int)evals.size();
fprintf(f,"%d\n",N);
for (int i=0;i<N;i++) {
fprintf(f,"%.15E\n",evals[i]);
}
fclose(f);
}
void write_history(char* fn, std::vector<RealD>& hist) {
FILE* f = fopen(fn,"wt");
assert(f);
int N = (int)hist.size();
for (int i=0;i<N;i++) {
fprintf(f,"%d %.15E\n",i,hist[i]);
}
fclose(f);
}
template<typename Field>
class CheckpointedLinearFunction : public LinearFunction<Field> {
public:
LinearFunction<Field>& _op;
std::string _dir;
int _max_apply;
int _apply, _apply_actual;
GridBase* _grid;
FILE* _f;
CheckpointedLinearFunction(GridBase* grid, LinearFunction<Field>& op, const char* dir,int max_apply) : _op(op), _dir(dir), _grid(grid), _f(0),
_max_apply(max_apply), _apply(0), _apply_actual(0) {
FieldVectorIO::conditionalMkDir(dir);
char fn[4096];
sprintf(fn,"%s/ckpt_op.%4.4d",_dir.c_str(),_grid->ThisRank());
printf("CheckpointLinearFunction:: file %s\n",fn);
_f = fopen(fn,"r+b");
if (!_f)
_f = fopen(fn,"w+b");
assert(_f);
fseek(_f,0,SEEK_CUR);
}
~CheckpointedLinearFunction() {
if (_f) {
fclose(_f);
_f = 0;
}
}
bool load_ckpt(const Field& in, Field& out) {
off_t cur = ftello(_f);
fseeko(_f,0,SEEK_END);
if (cur == ftello(_f))
return false;
fseeko(_f,cur,SEEK_SET);
size_t sz = sizeof(out._odata[0]) * out._odata.size();
GridStopWatch gsw;
gsw.Start();
uint32_t crc_exp;
assert(fread(&crc_exp,4,1,_f)==1);
assert(fread(&out._odata[0],sz,1,_f)==1);
assert(FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0)==crc_exp);
gsw.Stop();
printf("CheckpointLinearFunction:: reading %lld\n",(long long)sz);
std::cout << GridLogMessage << "Loading " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl;
return true;
}
void save_ckpt(const Field& in, Field& out) {
fseek(_f,0,SEEK_CUR); // switch to write
size_t sz = sizeof(out._odata[0]) * out._odata.size();
GridStopWatch gsw;
gsw.Start();
uint32_t crc = FieldVectorIO::crc32_threaded((unsigned char*)&out._odata[0],sz,0x0);
assert(fwrite(&crc,4,1,_f)==1);
assert(fwrite(&out._odata[0],sz,1,_f)==1);
fflush(_f); // try this on the GPFS to suppress OPA usage for disk during dslash; this is not needed at Lustre/JLAB
gsw.Stop();
printf("CheckpointLinearFunction:: writing %lld\n",(long long)sz);
std::cout << GridLogMessage << "Saving " << ((RealD)sz/1024./1024./1024.) << " GB in " << gsw.Elapsed() << std::endl;
}
void operator()(const Field& in, Field& out) {
_apply++;
if (load_ckpt(in,out))
return;
_op(in,out);
save_ckpt(in,out);
if (_apply_actual++ >= _max_apply) {
std::cout << GridLogMessage << "Maximum application of operator reached, checkpoint and finish in future job" << std::endl;
if (_f) { fclose(_f); _f=0; }
in._grid->Barrier();
Grid_finalize();
exit(3);
}
}
};
template<typename CoarseField,typename Field>
class ProjectedFunctionHermOp : public LinearFunction<CoarseField> {
public:
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
BlockProjector<Field>& _pr;
ProjectedFunctionHermOp(BlockProjector<Field>& pr,OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop) : _poly(poly), _Linop(linop), _pr(pr) {
}
void operator()(const CoarseField& in, CoarseField& out) {
assert(_pr._bgrid._o_blocks == in._grid->oSites());
Field fin(_pr._bgrid._grid);
Field fout(_pr._bgrid._grid);
GridStopWatch gsw1,gsw2,gsw3;
// fill fin
gsw1.Start();
_pr.coarseToFine(in,fin);
gsw1.Stop();
// apply poly
gsw2.Start();
_poly(_Linop,fin,fout);
gsw2.Stop();
// fill out
gsw3.Start();
_pr.fineToCoarse(fout,out);
gsw3.Stop();
auto eps = innerProduct(in,out);
std::cout << GridLogMessage << "Operator timing details: c2f = " << gsw1.Elapsed() << " poly = " << gsw2.Elapsed() << " f2c = " << gsw3.Elapsed() <<
" Complimentary Hermiticity check: " << eps.imag() / std::abs(eps) << std::endl;
}
};
template<typename CoarseField,typename Field>
class ProjectedHermOp : public LinearFunction<CoarseField> {
public:
LinearOperatorBase<Field> &_Linop;
BlockProjector<Field>& _pr;
ProjectedHermOp(BlockProjector<Field>& pr,LinearOperatorBase<Field>& linop) : _Linop(linop), _pr(pr) {
}
void operator()(const CoarseField& in, CoarseField& out) {
assert(_pr._bgrid._o_blocks == in._grid->oSites());
Field fin(_pr._bgrid._grid);
Field fout(_pr._bgrid._grid);
_pr.coarseToFine(in,fin);
_Linop.HermOp(fin,fout);
_pr.fineToCoarse(fout,out);
}
};
template<typename vtype, int N > using CoarseSiteFieldGeneral = iScalar< iVector<vtype, N> >;
template<int N> using CoarseSiteFieldD = CoarseSiteFieldGeneral< vComplexD, N >;
template<int N> using CoarseSiteFieldF = CoarseSiteFieldGeneral< vComplexF, N >;
template<int N> using CoarseSiteField = CoarseSiteFieldGeneral< vComplex, N >;
template<int N> using CoarseLatticeFermion = Lattice< CoarseSiteField<N> >;
template<int N> using CoarseLatticeFermionD = Lattice< CoarseSiteFieldD<N> >;
template<typename Field,int Nstop1>
void CoarseGridLanczos(BlockProjector<Field>& pr,RealD alpha2,RealD beta,int Npoly2,
int Nstop2,int Nk2,int Nm2,RealD resid2,RealD betastp2,int MaxIt,int MinRes2,
LinearOperatorBase<Field>& HermOp, std::vector<RealD>& eval1, bool cg_test_enabled,
int cg_test_maxiter,int nsingle,int SkipTest2, int MaxApply2,bool smoothed_eval_enabled,
int smoothed_eval_inner,int smoothed_eval_outer,int smoothed_eval_begin,
int smoothed_eval_end,RealD smoothed_eval_inner_resid) {
BlockedGrid<Field>& bgrid = pr._bgrid;
BasisFieldVector<Field>& basis = pr._evec;
std::vector<int> coarseFourDimLatt;
for (int i=0;i<4;i++)
coarseFourDimLatt.push_back(bgrid._nb[1+i] * bgrid._grid->_processors[1+i]);
assert(bgrid._grid->_processors[0] == 1);
std::cout << GridLogMessage << "CoarseGrid = " << coarseFourDimLatt << " with basis = " << Nstop1 << std::endl;
GridCartesian * UCoarseGrid = SpaceTimeGrid::makeFourDimGrid(coarseFourDimLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * FCoarseGrid = SpaceTimeGrid::makeFiveDimGrid(bgrid._nb[0],UCoarseGrid);
Chebyshev<Field> Cheb2(alpha2,beta,Npoly2);
CoarseLatticeFermion<Nstop1> src_coarse(FCoarseGrid);
// Second round of Lanczos in blocked space
std::vector<RealD> eval2(Nm2);
std::vector<RealD> eval3(Nm2);
BasisFieldVector<CoarseLatticeFermion<Nstop1> > coef(Nm2,FCoarseGrid);
ProjectedFunctionHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2plain(pr,Cheb2,HermOp);
CheckpointedLinearFunction<CoarseLatticeFermion<Nstop1> > Op2ckpt(src_coarse._grid,Op2plain,"checkpoint",MaxApply2);
LinearFunction< CoarseLatticeFermion<Nstop1> >* Op2;
if (MaxApply2) {
Op2 = &Op2ckpt;
} else {
Op2 = &Op2plain;
}
ProjectedHermOp<CoarseLatticeFermion<Nstop1>,LatticeFermion> Op2nopoly(pr,HermOp);
ImplicitlyRestartedLanczos<CoarseLatticeFermion<Nstop1> > IRL2(*Op2,*Op2,Nstop2,Nk2,Nm2,resid2,MaxIt,betastp2,MinRes2);
src_coarse = 1.0;
// Precision test
{
Field tmp(bgrid._grid);
CoarseLatticeFermion<Nstop1> tmp2(FCoarseGrid);
CoarseLatticeFermion<Nstop1> tmp3(FCoarseGrid);
tmp2 = 1.0;
tmp3 = 1.0;
pr.coarseToFine(tmp2,tmp);
pr.fineToCoarse(tmp,tmp2);
tmp2 -= tmp3;
std::cout << GridLogMessage << "Precision Test c->f->c: " << norm2(tmp2) / norm2(tmp3) << std::endl;
//bgrid._grid->Barrier();
//return;
}
int Nconv;
if (!FieldVectorIO::read_compressed_vectors("lanczos.output",pr,coef) ||
!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt",eval3) ||
!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.linear",eval1) ||
!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.poly",eval2)
) {
IRL2.calc(eval2,coef._v,src_coarse,Nconv,true);
coef.resize(Nstop2);
eval2.resize(Nstop2);
eval3.resize(Nstop2);
std::vector<Field> step3_cache;
// reconstruct eigenvalues of original operator
for (int i=0;i<Nstop2;i++){
RealD eval2_linear;
if (i<Nstop1) {
eval2_linear = eval1[i];
} else {
eval2_linear = eval2[i-1];
}
RealD eval2_poly = eval2[i];
RealD eval_reconstruct = Cheb2.approxInv(eval2_poly,eval2_linear,100,1e-10);
std::cout << i << " Reconstructed eval = " << eval_reconstruct << " from quess " << eval2_linear << std::endl;
eval2[i] = eval_reconstruct;
}
// as demonstrated in CG test below, best result from mixed determination
for (int i=0;i<Nstop2;i++)
eval3[i] = (i < Nstop1) ? eval1[i] : eval2[i];
for(int i=0;i<Nstop2;i++){
std::cout << i<<" / "<< Nstop2<< " eigenvalue "<< eval3[i] <<std::endl;
};
// write
mkdir("lanczos.output",ACCESSPERMS);
FieldVectorIO::write_compressed_vectors("lanczos.output",pr,coef,nsingle);
if (bgrid._grid->IsBoss()) {
write_evals((char *)"lanczos.output/eigen-values.txt",eval3);
write_evals((char *)"lanczos.output/eigen-values.txt.linear",eval1);
write_evals((char *)"lanczos.output/eigen-values.txt.poly",eval2);
}
}
// fix up eigenvalues
if (!read_evals(UCoarseGrid,(char *)"lanczos.output/eigen-values.txt.smoothed",eval3) && smoothed_eval_enabled) {
ConjugateGradient<LatticeFermion> CG(smoothed_eval_inner_resid, smoothed_eval_inner, false);
LatticeFermion v_i(basis[0]._grid);
auto tmp = v_i;
auto tmp2 = v_i;
for (int i=smoothed_eval_begin;i<smoothed_eval_end;i++) {
GridStopWatch gsw;
gsw.Start();
pr.coarseToFine(coef[i],v_i);
v_i.checkerboard = Odd;
for (int j=0;j<smoothed_eval_outer;j++) {
tmp=zero;
//pr.deflate(coef,eval3,Nstop2,v_i,tmp);
CG(HermOp, v_i, tmp);
v_i = 1.0 / ::sqrt( norm2(tmp) ) * tmp;
}
tmp = v_i;
HermOp.HermOp(tmp,tmp2);
RealD ev = innerProduct(tmp,tmp2).real();
gsw.Stop();
std::cout << GridLogMessage << "Smoothed eigenvalue " << i << " from " << eval3[i] << " to " << ev << " in " << gsw.Elapsed() << std::endl;
// " with effective smoother precision " << (CG.ResHistory.back() / CG.ResHistory.front() ) << std::endl;
// CG.ResHistory.clear();
eval3[i] = ev;
}
if (bgrid._grid->IsBoss()) {
write_evals((char *)"lanczos.output/eigen-values.txt.smoothed",eval3);
write_evals((char *)"lanczos.output/eigen-values.txt",eval3); // also reset this to the best ones we have available
}
}
// do CG test with and without deflation
if (cg_test_enabled) {
ConjugateGradient<LatticeFermion> CG(1.0e-8, cg_test_maxiter, false);
LatticeFermion src_orig(bgrid._grid);
src_orig.checkerboard = Odd;
src_orig = 1.0;
src_orig = src_orig * (1.0 / ::sqrt(norm2(src_orig)) );
auto result = src_orig;
// undeflated solve
std::cout << GridLogMessage << " Undeflated solve "<<std::endl;
result = zero;
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.undefl",CG.ResHistory);
// CG.ResHistory.clear();
// deflated solve with all eigenvectors
std::cout << GridLogMessage << " Deflated solve with all evectors"<<std::endl;
result = zero;
pr.deflate(coef,eval2,Nstop2,src_orig,result);
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.defl_all",CG.ResHistory);
// CG.ResHistory.clear();
// deflated solve with non-blocked eigenvectors
std::cout << GridLogMessage << " Deflated solve with non-blocked evectors"<<std::endl;
result = zero;
pr.deflate(coef,eval1,Nstop1,src_orig,result);
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.defl_full",CG.ResHistory);
// CG.ResHistory.clear();
// deflated solve with all eigenvectors and original eigenvalues from proj
std::cout << GridLogMessage << " Deflated solve with all eigenvectors and original eigenvalues from proj"<<std::endl;
result = zero;
pr.deflate(coef,eval3,Nstop2,src_orig,result);
CG(HermOp, src_orig, result);
// if (UCoarseGrid->IsBoss())
// write_history("cg_test.defl_all_ev3",CG.ResHistory);
// CG.ResHistory.clear();
}
}
template<typename Field>
void quick_krylov_basis(BasisFieldVector<Field>& evec,Field& src,LinearFunction<Field>& Op,int Nstop) {
Field tmp = src;
Field tmp2 = tmp;
for (int i=0;i<Nstop;i++) {
GridStopWatch gsw;
gsw.Start();
Op(tmp,tmp2);
gsw.Stop();
evec.orthogonalize(tmp2,i);
RealD nn = norm2(tmp2);
nn = Grid::sqrt(nn);
tmp2 = tmp2 * (1.0/nn);
evec[i] = tmp2;
tmp = tmp2;
std::cout << GridLogMessage << "Quick_krylov_basis: " << i << "/" << Nstop << " timing of operator=" << gsw.Elapsed() << std::endl;
}
}
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
const int MaxIt = 10000;
int Ls;
RealD mass;
RealD M5;
std::vector < std::complex<double> > omega;
RealD alpha1, alpha2, beta;
int Npoly1, Npoly2;
int Nstop1, Nstop2;
int Nk1, Nk2;
int Np1, Np2;
int MinRes1, MinRes2;
int SkipTest2, MaxApply2;
bool checkpoint_basis;
bool cg_test_enabled;
bool exit_after_basis_calculation;
bool simple_krylov_basis;
int cg_test_maxiter;
int nsingle; // store in single precision, the rest in FP16
int max_cheb_time_ms;
bool smoothed_eval_enabled;
int smoothed_eval_inner;
int smoothed_eval_outer;
int smoothed_eval_begin;
int smoothed_eval_end;
RealD smoothed_eval_inner_resid;
// vector representation
std::vector<int> block_size; // 5d block size
RealD resid1, resid2, betastp1, betastp2, basis_norm_threshold;
std::string config;
Params jp("params.txt");
PADD(jp,Npoly1); PADD(jp,Npoly2);
PADD(jp,max_cheb_time_ms);
PADD(jp,Nstop1); PADD(jp,Nstop2); PADD(jp,MaxApply2);
PADD(jp,Nk1); PADD(jp,Nk2); PADD(jp,betastp1); PADD(jp,betastp2);
PADD(jp,Np1); PADD(jp,Np2); basis_norm_threshold = 1e-5; //PADD(jp,basis_norm_threshold);
PADD(jp,block_size); PADD(jp,smoothed_eval_enabled); PADD(jp,smoothed_eval_inner);
PADD(jp,resid1); PADD(jp,resid2); PADD(jp,smoothed_eval_outer);
PADD(jp,alpha1); PADD(jp,alpha2); PADD(jp,smoothed_eval_begin);
PADD(jp,MinRes1); PADD(jp,MinRes2); PADD(jp,smoothed_eval_end);
PADD(jp,beta); PADD(jp,mass); PADD(jp,smoothed_eval_inner_resid);
PADD(jp,omega); PADD(jp,config);
PADD(jp,M5); PADD(jp,cg_test_enabled);
PADD(jp,cg_test_maxiter); PADD(jp,checkpoint_basis);
PADD(jp,nsingle); PADD(jp,exit_after_basis_calculation);
PADD(jp,simple_krylov_basis); PADD(jp,SkipTest2);
Ls = (int)omega.size();
// Grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * UGridHP = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridRedBlackCartesian * UrbGridHP = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridHP);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridCartesian * FGridHP = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridHP);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGridHP = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridHP);
// Gauge field
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
NerscIO::readConfiguration(Umu,header,config);
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
<< " Ls: " << Ls << std::endl;
// ZMobius EO Operator
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, omega,1.,0.);
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
// Eigenvector storage
const int Nm1 = Np1 + Nk1;
const int Nm2 = Np2 + Nk2; // maximum number of vectors we need to keep
std::cout << GridLogMessage << "Keep " << Nm1 << " full vectors" << std::endl;
std::cout << GridLogMessage << "Keep " << Nm2 << " total vectors" << std::endl;
assert(Nm2 >= Nm1);
BasisFieldVector<LatticeFermion> evec(Nm1,FrbGrid); // start off with keeping full vectors
// First and second cheby
Chebyshev<LatticeFermion> Cheb1(alpha1,beta,Npoly1);
FunctionHermOp<LatticeFermion> Op1(Cheb1,HermOp);
PlainHermOp<LatticeFermion> Op1test(HermOp);
// Eigenvalue storage
std::vector<RealD> eval1(evec.size());
// Construct source vector
LatticeFermion src(FrbGrid);
{
src=1.0;
src.checkerboard = Odd;
// normalize
RealD nn = norm2(src);
nn = Grid::sqrt(nn);
src = src * (1.0/nn);
}
// Do a benchmark and a quick exit if performance is too little (ugly but needed due to performance fluctuations)
if (max_cheb_time_ms) {
// one round of warmup
auto tmp = src;
GridStopWatch gsw1,gsw2;
gsw1.Start();
Cheb1(HermOp,src,tmp);
gsw1.Stop();
Ddwf.ZeroCounters();
gsw2.Start();
Cheb1(HermOp,src,tmp);
gsw2.Stop();
Ddwf.Report();
std::cout << GridLogMessage << "Performance check; warmup = " << gsw1.Elapsed() << " test = " << gsw2.Elapsed() << std::endl;
int ms = (int)(gsw2.useconds()/1e3);
if (ms > max_cheb_time_ms) {
std::cout << GridLogMessage << "Performance too poor: " << ms << " ms, cutoff = " << max_cheb_time_ms << " ms" << std::endl;
Grid_finalize();
return 2;
}
}
// First round of Lanczos to get low mode basis
ImplicitlyRestartedLanczos<LatticeFermion> IRL1(Op1,Op1test,Nstop1,Nk1,Nm1,resid1,MaxIt,betastp1,MinRes1);
int Nconv;
char tag[1024];
if (!FieldVectorIO::read_argonne(evec,(char *)"checkpoint") || !read_evals(UGrid,(char *)"checkpoint/eigen-values.txt",eval1)) {
if (simple_krylov_basis) {
quick_krylov_basis(evec,src,Op1,Nstop1);
} else {
IRL1.calc(eval1,evec._v,src,Nconv,false);
}
evec.resize(Nstop1); // and throw away superfluous
eval1.resize(Nstop1);
if (checkpoint_basis)
FieldVectorIO::write_argonne(evec,(char *)"checkpoint");
if (UGrid->IsBoss() && checkpoint_basis)
write_evals((char *)"checkpoint/eigen-values.txt",eval1);
Ddwf.Report();
if (exit_after_basis_calculation) {
Grid_finalize();
return 0;
}
}
// now test eigenvectors
if (!simple_krylov_basis) {
for (int i=0;i<Nstop1;i++){
auto B = evec[i];
auto tmp = B;
auto v = B;
{
HermOp.HermOp(B,v);
RealD vnum = real(innerProduct(B,v)); // HermOp.
RealD vden = norm2(B);
RealD vv0 = norm2(v);
RealD eval2 = vnum/vden;
v -= eval2*B;
RealD vv = norm2(v);
std::cout << i << " OP eval = " << eval2 << " (" << eval1[i] << ") "
<< "res2 = " << vv << " norm2 = " << norm2(B) << std::endl;
}
}
}
// do second step only if needed
if (Nstop1 <= Nstop2) {
// Now setup blocking
assert(evec.size() == Nstop1);
BlockedGrid<LatticeFermion> bgrid(FrbGrid, block_size);
BlockProjector<LatticeFermion> pr(evec,bgrid);
pr.createOrthonormalBasis(basis_norm_threshold);
pr.createOrthonormalBasis(basis_norm_threshold); // another round due to precision issues created by local coherence
constexpr int common_basis_sizes[] = { 60, 250, 400 };
constexpr int n_common_basis_sizes = sizeof(common_basis_sizes) / sizeof(common_basis_sizes[0]);
switch (Nstop1) {
#define BASIS(n) case common_basis_sizes[n]:\
CoarseGridLanczos<LatticeFermion,common_basis_sizes[n]>\
(pr,alpha2,beta,Npoly2,Nstop2,Nk2,Nm2,resid2,betastp2,MaxIt,MinRes2,HermOp,eval1, \
cg_test_enabled,cg_test_maxiter,nsingle,SkipTest2, \
MaxApply2,smoothed_eval_enabled,smoothed_eval_inner,smoothed_eval_outer, \
smoothed_eval_begin,smoothed_eval_end,smoothed_eval_inner_resid); break;
BASIS(0);
BASIS(1);
BASIS(2);
default:
std::cout << GridLogMessage << "Basis size " << Nstop1 << " must be added at compile-time" << std::endl;
std::cout << GridLogMessage << "Currently available sizes: " << std::endl;
for (int i=0;i<n_common_basis_sizes;i++) {
std::cout << GridLogMessage << " " << common_basis_sizes[i] << std::endl;
}
}
}
Grid_finalize();
}