mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Compare commits
3 Commits
b7c7000d0d
...
25f71913b7
Author | SHA1 | Date | |
---|---|---|---|
|
25f71913b7 | ||
|
34ddd2b7b1 | ||
|
d5fd90b2f3 |
@ -69,279 +69,339 @@ class TwoLevelCG : public LinearFunction<Field>
|
|||||||
|
|
||||||
virtual void operator() (const Field &src, Field &x)
|
virtual void operator() (const Field &src, Field &x)
|
||||||
{
|
{
|
||||||
#if 0
|
std::cout << GridLogMessage<<"HDCG: fPcg starting"<<std::endl;
|
||||||
Field resid(grid);
|
|
||||||
RealD f;
|
RealD f;
|
||||||
RealD rtzp,rtz,a,d,b;
|
RealD rtzp,rtz,a,d,b;
|
||||||
RealD rptzp;
|
RealD rptzp;
|
||||||
|
|
||||||
Field p(grid);
|
/////////////////////////////
|
||||||
|
// Set up history vectors
|
||||||
|
/////////////////////////////
|
||||||
|
int mmax = 5;
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocating"<<std::endl;
|
||||||
|
std::vector<Field> p(mmax,grid);
|
||||||
|
std::vector<Field> mmp(mmax,grid);
|
||||||
|
std::vector<RealD> pAp(mmax);
|
||||||
Field z(grid);
|
Field z(grid);
|
||||||
Field tmp(grid);
|
Field tmp(grid);
|
||||||
Field mmp(grid);
|
Field mp (grid);
|
||||||
Field r (grid);
|
Field r (grid);
|
||||||
Field mu (grid);
|
Field mu (grid);
|
||||||
Field rp (grid);
|
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocated"<<std::endl;
|
||||||
//Initial residual computation & set up
|
//Initial residual computation & set up
|
||||||
double tn;
|
RealD guess = norm2(x);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg guess nrm "<<guess<<std::endl;
|
||||||
|
RealD src_nrm = norm2(src);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg src nrm "<<src_nrm<<std::endl;
|
||||||
|
|
||||||
|
if ( src_nrm == 0.0 ) {
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg given trivial source norm "<<src_nrm<<std::endl;
|
||||||
|
x=Zero();
|
||||||
|
}
|
||||||
|
RealD tn;
|
||||||
|
|
||||||
GridStopWatch HDCGTimer;
|
GridStopWatch HDCGTimer;
|
||||||
HDCGTimer.Start();
|
HDCGTimer.Start();
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
// x0 = Vstart -- possibly modify guess
|
// x0 = Vstart -- possibly modify guess
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
x=Zero();
|
|
||||||
Vstart(x,src);
|
Vstart(x,src);
|
||||||
|
|
||||||
// r0 = b -A x0
|
// r0 = b -A x0
|
||||||
_FineLinop.HermOp(x,mmp);
|
_FineLinop.HermOp(x,mmp[0]);
|
||||||
|
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
||||||
axpy(r, -1.0, mmp, src); // Recomputes r=src-x0
|
{
|
||||||
rp=r;
|
double n1 = norm2(x);
|
||||||
|
double n2 = norm2(mmp[0]);
|
||||||
|
double n3 = norm2(r);
|
||||||
|
std::cout<<GridLogMessage<<"x,vstart,r = "<<n1<<" "<<n2<<" "<<n3<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
// Compute z = M1 x
|
// Compute z = M1 x
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
PcgM1(r,z);
|
PcgM1(r,z);
|
||||||
rtzp =real(innerProduct(r,z));
|
rtzp =real(innerProduct(r,z));
|
||||||
|
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
// Except Def2, M2 is trivial
|
// Solve for Mss mu = P A z and set p = z-mu
|
||||||
|
// Def2 p = 1 - Q Az = Pright z
|
||||||
|
// Other algos M2 is trivial
|
||||||
///////////////////////////////////////
|
///////////////////////////////////////
|
||||||
p=z;
|
PcgM2(z,p[0]);
|
||||||
|
|
||||||
RealD ssq = norm2(src);
|
RealD ssq = norm2(src);
|
||||||
RealD rsq = ssq*Tolerance*Tolerance;
|
RealD rsq = ssq*Tolerance*Tolerance;
|
||||||
|
|
||||||
GRID_TRACE("MultiGrid TwoLevel ");
|
std::cout << GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" rsq "<<rsq<<"\n";
|
||||||
std::cout<<GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" target rsq "<<rsq<<" ssq "<<ssq<<std::endl;
|
|
||||||
|
Field pp(grid);
|
||||||
|
|
||||||
|
for (int k=0;k<=MaxIterations;k++){
|
||||||
|
|
||||||
for (int k=1;k<=MaxIterations;k++){
|
int peri_k = k % mmax;
|
||||||
|
int peri_kp = (k+1) % mmax;
|
||||||
|
|
||||||
rtz=rtzp;
|
rtz=rtzp;
|
||||||
d= PcgM3(p,mmp);
|
d= PcgM3(p[peri_k],mmp[peri_k]);
|
||||||
a = rtz/d;
|
a = rtz/d;
|
||||||
|
|
||||||
|
// Memorise this
|
||||||
|
pAp[peri_k] = d;
|
||||||
|
|
||||||
|
axpy(x,a,p[peri_k],x);
|
||||||
|
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
||||||
|
|
||||||
axpy(x,a,p,x);
|
// Compute z = M x
|
||||||
RealD rn = axpy_norm(r,-a,mmp,r);
|
|
||||||
|
|
||||||
PcgM1(r,z);
|
PcgM1(r,z);
|
||||||
|
|
||||||
rtzp =real(innerProduct(r,z));
|
{
|
||||||
|
RealD n1,n2;
|
||||||
int ipcg=1; // almost free inexact preconditioned CG
|
n1=norm2(r);
|
||||||
if (ipcg) {
|
n2=norm2(z);
|
||||||
rptzp =real(innerProduct(rp,z));
|
std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : vector r,z "<<n1<<" "<<n2<<"\n";
|
||||||
} else {
|
|
||||||
rptzp =0;
|
|
||||||
}
|
}
|
||||||
b = (rtzp-rptzp)/rtz;
|
rtzp =real(innerProduct(r,z));
|
||||||
|
std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : inner rtzp "<<rtzp<<"\n";
|
||||||
|
|
||||||
|
// PcgM2(z,p[0]);
|
||||||
PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
||||||
|
|
||||||
|
p[peri_kp]=mu;
|
||||||
|
|
||||||
axpy(p,b,p,mu); // mu = A r
|
// Standard search direction p -> z + b p
|
||||||
|
b = (rtzp)/rtz;
|
||||||
|
|
||||||
|
int northog;
|
||||||
|
// k=zero <=> peri_kp=1; northog = 1
|
||||||
|
// k=1 <=> peri_kp=2; northog = 2
|
||||||
|
// ... ... ...
|
||||||
|
// k=mmax-2<=> peri_kp=mmax-1; northog = mmax-1
|
||||||
|
// k=mmax-1<=> peri_kp=0; northog = 1
|
||||||
|
|
||||||
|
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
|
||||||
|
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : orthogonalising to last "<<northog<<" vectors\n";
|
||||||
|
for(int back=0; back < northog; back++){
|
||||||
|
int peri_back = (k-back)%mmax;
|
||||||
|
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
|
||||||
|
RealD beta = -pbApk/pAp[peri_back];
|
||||||
|
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
|
||||||
|
}
|
||||||
|
|
||||||
RealD rrn=sqrt(rn/ssq);
|
RealD rrn=sqrt(rn/ssq);
|
||||||
RealD rtn=sqrt(rtz/ssq);
|
RealD rtn=sqrt(rtz/ssq);
|
||||||
std::cout<<GridLogMessage<<"HDCG: Pcg k= "<<k<<" residual = "<<rrn<<std::endl;
|
RealD rtnp=sqrt(rtzp/ssq);
|
||||||
|
|
||||||
if ( ipcg ) {
|
std::cout<<GridLogMessage<<"HDCG: fPcg k= "<<k<<" residual = "<<rrn<<"\n";
|
||||||
axpy(rp,0.0,r,r);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stopping condition
|
// Stopping condition
|
||||||
if ( rn <= rsq ) {
|
if ( rn <= rsq ) {
|
||||||
|
|
||||||
HDCGTimer.Stop();
|
HDCGTimer.Stop();
|
||||||
std::cout<<GridLogMessage<<"HDCG: Pcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
|
std::cout<<GridLogMessage<<"HDCG: fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
|
||||||
|
|
||||||
_FineLinop.HermOp(x,mmp);
|
_FineLinop.HermOp(x,mmp[0]);
|
||||||
axpy(tmp,-1.0,src,mmp);
|
axpy(tmp,-1.0,src,mmp[0]);
|
||||||
|
|
||||||
RealD mmpnorm = sqrt(norm2(mmp));
|
RealD mmpnorm = sqrt(norm2(mmp[0]));
|
||||||
RealD xnorm = sqrt(norm2(x));
|
RealD xnorm = sqrt(norm2(x));
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
RealD srcnorm = sqrt(norm2(src));
|
||||||
RealD tmpnorm = sqrt(norm2(tmp));
|
RealD tmpnorm = sqrt(norm2(tmp));
|
||||||
RealD true_residual = tmpnorm/srcnorm;
|
RealD true_residual = tmpnorm/srcnorm;
|
||||||
std::cout<<GridLogMessage
|
std::cout<<GridLogMessage
|
||||||
<<"HDCG: true residual is "<<true_residual
|
|
||||||
<<" solution "<<xnorm
|
|
||||||
<<" source "<<srcnorm
|
|
||||||
<<" mmp "<<mmpnorm
|
|
||||||
<<std::endl;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
std::cout<<GridLogMessage<<"HDCG: not converged"<<std::endl;
|
|
||||||
RealD xnorm = sqrt(norm2(x));
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
|
||||||
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
|
|
||||||
|
|
||||||
return ;
|
|
||||||
#else
|
|
||||||
RealD f;
|
|
||||||
RealD rtzp,rtz,a,d,b;
|
|
||||||
RealD rptzp;
|
|
||||||
|
|
||||||
/////////////////////////////
|
|
||||||
// Set up history vectors
|
|
||||||
/////////////////////////////
|
|
||||||
int mmax = 20;
|
|
||||||
std::vector<Field> p(mmax,grid);
|
|
||||||
std::vector<Field> mmp(mmax,grid);
|
|
||||||
std::vector<RealD> pAp(mmax);
|
|
||||||
Field z(grid);
|
|
||||||
Field tmp(grid);
|
|
||||||
Field mp (grid);
|
|
||||||
Field r (grid);
|
|
||||||
Field mu (grid);
|
|
||||||
|
|
||||||
//Initial residual computation & set up
|
|
||||||
RealD guess = norm2(x);
|
|
||||||
RealD src_nrm = norm2(src);
|
|
||||||
|
|
||||||
if ( src_nrm == 0.0 ) {
|
|
||||||
std::cout << GridLogMessage<<"HDCG: fPcg given trivial source norm "<<src_nrm<<std::endl;
|
|
||||||
x=Zero();
|
|
||||||
}
|
|
||||||
RealD tn;
|
|
||||||
|
|
||||||
GridStopWatch HDCGTimer;
|
|
||||||
HDCGTimer.Start();
|
|
||||||
//////////////////////////
|
|
||||||
// x0 = Vstart -- possibly modify guess
|
|
||||||
//////////////////////////
|
|
||||||
Vstart(x,src);
|
|
||||||
|
|
||||||
// r0 = b -A x0
|
|
||||||
_FineLinop.HermOp(x,mmp[0]);
|
|
||||||
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
|
||||||
{
|
|
||||||
double n1 = norm2(x);
|
|
||||||
double n2 = norm2(mmp[0]);
|
|
||||||
double n3 = norm2(r);
|
|
||||||
std::cout<<GridLogMessage<<"x,vstart,r = "<<n1<<" "<<n2<<" "<<n3<<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////
|
|
||||||
// Compute z = M1 x
|
|
||||||
//////////////////////////////////
|
|
||||||
PcgM1(r,z);
|
|
||||||
rtzp =real(innerProduct(r,z));
|
|
||||||
|
|
||||||
///////////////////////////////////////
|
|
||||||
// Solve for Mss mu = P A z and set p = z-mu
|
|
||||||
// Def2: p = 1 - Q Az = Pright z
|
|
||||||
// Other algos M2 is trivial
|
|
||||||
///////////////////////////////////////
|
|
||||||
PcgM2(z,p[0]);
|
|
||||||
|
|
||||||
RealD ssq = norm2(src);
|
|
||||||
RealD rsq = ssq*Tolerance*Tolerance;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<<"HDCG: k=0 residual "<<rtzp<<" rsq "<<rsq<<"\n";
|
|
||||||
|
|
||||||
Field pp(grid);
|
|
||||||
|
|
||||||
for (int k=0;k<=MaxIterations;k++){
|
|
||||||
|
|
||||||
int peri_k = k % mmax;
|
|
||||||
int peri_kp = (k+1) % mmax;
|
|
||||||
|
|
||||||
rtz=rtzp;
|
|
||||||
d= PcgM3(p[peri_k],mmp[peri_k]);
|
|
||||||
a = rtz/d;
|
|
||||||
|
|
||||||
// Memorise this
|
|
||||||
pAp[peri_k] = d;
|
|
||||||
|
|
||||||
|
|
||||||
axpy(x,a,p[peri_k],x);
|
|
||||||
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
|
||||||
|
|
||||||
// Compute z = M x
|
|
||||||
PcgM1(r,z);
|
|
||||||
|
|
||||||
{
|
|
||||||
RealD n1,n2;
|
|
||||||
n1=norm2(r);
|
|
||||||
n2=norm2(z);
|
|
||||||
std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : vector r,z "<<n1<<" "<<n2<<"\n";
|
|
||||||
}
|
|
||||||
rtzp =real(innerProduct(r,z));
|
|
||||||
std::cout << GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : inner rtzp "<<rtzp<<"\n";
|
|
||||||
|
|
||||||
// PcgM2(z,p[0]);
|
|
||||||
PcgM2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
|
||||||
|
|
||||||
p[peri_kp]=mu;
|
|
||||||
|
|
||||||
// Standard search direction p -> z + b p ; b =
|
|
||||||
b = (rtzp)/rtz;
|
|
||||||
|
|
||||||
int northog;
|
|
||||||
// k=zero <=> peri_kp=1; northog = 1
|
|
||||||
// k=1 <=> peri_kp=2; northog = 2
|
|
||||||
// ... ... ...
|
|
||||||
// k=mmax-2<=> peri_kp=mmax-1; northog = mmax-1
|
|
||||||
// k=mmax-1<=> peri_kp=0; northog = 1
|
|
||||||
|
|
||||||
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
|
|
||||||
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : orthogonalising to last "<<northog<<" vectors\n";
|
|
||||||
for(int back=0; back < northog; back++){
|
|
||||||
int peri_back = (k-back)%mmax;
|
|
||||||
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
|
|
||||||
RealD beta = -pbApk/pAp[peri_back];
|
|
||||||
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
|
|
||||||
}
|
|
||||||
|
|
||||||
RealD rrn=sqrt(rn/ssq);
|
|
||||||
RealD rtn=sqrt(rtz/ssq);
|
|
||||||
RealD rtnp=sqrt(rtzp/ssq);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"HDCG: fPcg k= "<<k<<" residual = "<<rrn<<"\n";
|
|
||||||
|
|
||||||
// Stopping condition
|
|
||||||
if ( rn <= rsq ) {
|
|
||||||
|
|
||||||
HDCGTimer.Stop();
|
|
||||||
std::cout<<GridLogMessage<<"HDCG: fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
|
|
||||||
|
|
||||||
_FineLinop.HermOp(x,mmp[0]);
|
|
||||||
axpy(tmp,-1.0,src,mmp[0]);
|
|
||||||
|
|
||||||
RealD mmpnorm = sqrt(norm2(mmp[0]));
|
|
||||||
RealD xnorm = sqrt(norm2(x));
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
|
||||||
RealD tmpnorm = sqrt(norm2(tmp));
|
|
||||||
RealD true_residual = tmpnorm/srcnorm;
|
|
||||||
std::cout<<GridLogMessage
|
|
||||||
<<"HDCG: true residual is "<<true_residual
|
<<"HDCG: true residual is "<<true_residual
|
||||||
<<" solution "<<xnorm
|
<<" solution "<<xnorm
|
||||||
<<" source "<<srcnorm
|
<<" source "<<srcnorm
|
||||||
<<" mmp "<<mmpnorm
|
<<" mmp "<<mmpnorm
|
||||||
<<std::endl;
|
<<std::endl;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
HDCGTimer.Stop();
|
||||||
|
std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl;
|
||||||
|
RealD xnorm = sqrt(norm2(x));
|
||||||
|
RealD srcnorm = sqrt(norm2(src));
|
||||||
|
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
virtual void operator() (std::vector<Field> &src, std::vector<Field> &x)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage<<"HDCG: mrhs fPcg starting"<<std::endl;
|
||||||
|
src[0].Grid()->Barrier();
|
||||||
|
int nrhs = src.size();
|
||||||
|
std::vector<RealD> f(nrhs);
|
||||||
|
std::vector<RealD> rtzp(nrhs);
|
||||||
|
std::vector<RealD> rtz(nrhs);
|
||||||
|
std::vector<RealD> a(nrhs);
|
||||||
|
std::vector<RealD> d(nrhs);
|
||||||
|
std::vector<RealD> b(nrhs);
|
||||||
|
std::vector<RealD> rptzp(nrhs);
|
||||||
|
/////////////////////////////
|
||||||
|
// Set up history vectors
|
||||||
|
/////////////////////////////
|
||||||
|
int mmax = 2;
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocating"<<std::endl;
|
||||||
|
src[0].Grid()->Barrier();
|
||||||
|
std::vector<std::vector<Field> > p(nrhs); for(int r=0;r<nrhs;r++) p[r].resize(mmax,grid);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocated p"<<std::endl;
|
||||||
|
src[0].Grid()->Barrier();
|
||||||
|
std::vector<std::vector<Field> > mmp(nrhs); for(int r=0;r<nrhs;r++) mmp[r].resize(mmax,grid);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocated mmp"<<std::endl;
|
||||||
|
src[0].Grid()->Barrier();
|
||||||
|
std::vector<std::vector<RealD> > pAp(nrhs); for(int r=0;r<nrhs;r++) pAp[r].resize(mmax);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocated pAp"<<std::endl;
|
||||||
|
src[0].Grid()->Barrier();
|
||||||
|
std::vector<Field> z(nrhs,grid);
|
||||||
|
std::vector<Field> mp (nrhs,grid);
|
||||||
|
std::vector<Field> r (nrhs,grid);
|
||||||
|
std::vector<Field> mu (nrhs,grid);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg allocated z,mp,r,mu"<<std::endl;
|
||||||
|
src[0].Grid()->Barrier();
|
||||||
|
|
||||||
|
//Initial residual computation & set up
|
||||||
|
std::vector<RealD> src_nrm(nrhs);
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
||||||
|
src_nrm[rhs]=norm2(src[rhs]);
|
||||||
|
assert(src_nrm[rhs]!=0.0);
|
||||||
|
}
|
||||||
|
std::vector<RealD> tn(nrhs);
|
||||||
|
|
||||||
|
GridStopWatch HDCGTimer;
|
||||||
|
HDCGTimer.Start();
|
||||||
|
//////////////////////////
|
||||||
|
// x0 = Vstart -- possibly modify guess
|
||||||
|
//////////////////////////
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++){
|
||||||
|
Vstart(x[rhs],src[rhs]);
|
||||||
|
|
||||||
|
// r0 = b -A x0
|
||||||
|
_FineLinop.HermOp(x[rhs],mmp[rhs][0]);
|
||||||
|
axpy (r[rhs], -1.0,mmp[rhs][0], src[rhs]); // Recomputes r=src-Ax0
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
//////////////////////////////////
|
||||||
HDCGTimer.Stop();
|
// Compute z = M1 x
|
||||||
std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl;
|
//////////////////////////////////
|
||||||
RealD xnorm = sqrt(norm2(x));
|
// This needs a multiRHS version for acceleration
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
PcgM1(r,z);
|
||||||
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
|
|
||||||
#endif
|
std::vector<RealD> ssq(nrhs);
|
||||||
|
std::vector<RealD> rsq(nrhs);
|
||||||
|
std::vector<Field> pp(nrhs,grid);
|
||||||
|
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++){
|
||||||
|
rtzp[rhs] =real(innerProduct(r[rhs],z[rhs]));
|
||||||
|
p[rhs][0]=z[rhs];
|
||||||
|
ssq[rhs]=norm2(src[rhs]);
|
||||||
|
rsq[rhs]= ssq[rhs]*Tolerance*Tolerance;
|
||||||
|
std::cout << GridLogMessage<<"mrhs HDCG: "<<rhs<<" k=0 residual "<<rtzp[rhs]<<" rsq "<<rsq[rhs]<<"\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<RealD> rn(nrhs);
|
||||||
|
for (int k=0;k<=MaxIterations;k++){
|
||||||
|
|
||||||
|
int peri_k = k % mmax;
|
||||||
|
int peri_kp = (k+1) % mmax;
|
||||||
|
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++){
|
||||||
|
rtz[rhs]=rtzp[rhs];
|
||||||
|
d[rhs]= PcgM3(p[rhs][peri_k],mmp[rhs][peri_k]);
|
||||||
|
a[rhs] = rtz[rhs]/d[rhs];
|
||||||
|
|
||||||
|
// Memorise this
|
||||||
|
pAp[rhs][peri_k] = d[rhs];
|
||||||
|
|
||||||
|
axpy(x[rhs],a[rhs],p[rhs][peri_k],x[rhs]);
|
||||||
|
rn[rhs] = axpy_norm(r[rhs],-a[rhs],mmp[rhs][peri_k],r[rhs]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute z = M x (for *all* RHS)
|
||||||
|
PcgM1(r,z);
|
||||||
|
|
||||||
|
RealD max_rn=0.0;
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++){
|
||||||
|
|
||||||
|
rtzp[rhs] =real(innerProduct(r[rhs],z[rhs]));
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<<"HDCG::fPcg rhs"<<rhs<<" iteration "<<k<<" : inner rtzp "<<rtzp[rhs]<<"\n";
|
||||||
|
|
||||||
|
mu[rhs]=z[rhs];
|
||||||
|
|
||||||
|
p[rhs][peri_kp]=mu[rhs];
|
||||||
|
|
||||||
|
// Standard search direction p == z + b p
|
||||||
|
b[rhs] = (rtzp[rhs])/rtz[rhs];
|
||||||
|
|
||||||
|
int northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
||||||
|
std::cout<<GridLogMessage<<"HDCG::fPcg iteration "<<k<<" : orthogonalising to last "<<northog<<" vectors\n";
|
||||||
|
for(int back=0; back < northog; back++){
|
||||||
|
int peri_back = (k-back)%mmax;
|
||||||
|
RealD pbApk= real(innerProduct(mmp[rhs][peri_back],p[rhs][peri_kp]));
|
||||||
|
RealD beta = -pbApk/pAp[rhs][peri_back];
|
||||||
|
axpy(p[rhs][peri_kp],beta,p[rhs][peri_back],p[rhs][peri_kp]);
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD rrn=sqrt(rn[rhs]/ssq[rhs]);
|
||||||
|
RealD rtn=sqrt(rtz[rhs]/ssq[rhs]);
|
||||||
|
RealD rtnp=sqrt(rtzp[rhs]/ssq[rhs]);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"HDCG: rhs "<<rhs<<"fPcg k= "<<k<<" residual = "<<rrn<<"\n";
|
||||||
|
if ( rrn > max_rn ) max_rn = rrn;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stopping condition based on worst case
|
||||||
|
if ( max_rn <= Tolerance ) {
|
||||||
|
|
||||||
|
HDCGTimer.Stop();
|
||||||
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
|
||||||
|
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++){
|
||||||
|
_FineLinop.HermOp(x[rhs],mmp[rhs][0]);
|
||||||
|
Field tmp(grid);
|
||||||
|
axpy(tmp,-1.0,src[rhs],mmp[rhs][0]);
|
||||||
|
|
||||||
|
RealD mmpnorm = sqrt(norm2(mmp[rhs][0]));
|
||||||
|
RealD xnorm = sqrt(norm2(x[rhs]));
|
||||||
|
RealD srcnorm = sqrt(norm2(src[rhs]));
|
||||||
|
RealD tmpnorm = sqrt(norm2(tmp));
|
||||||
|
RealD true_residual = tmpnorm/srcnorm;
|
||||||
|
std::cout<<GridLogMessage
|
||||||
|
<<"HDCG: true residual ["<<rhs<<"] is "<<true_residual
|
||||||
|
<<" solution "<<xnorm
|
||||||
|
<<" source "<<srcnorm
|
||||||
|
<<" mmp "<<mmpnorm
|
||||||
|
<<std::endl;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
HDCGTimer.Stop();
|
||||||
|
std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl;
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++){
|
||||||
|
RealD xnorm = sqrt(norm2(x[rhs]));
|
||||||
|
RealD srcnorm = sqrt(norm2(src[rhs]));
|
||||||
|
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual void PcgM1(std::vector<Field> & in,std::vector<Field> & out)
|
||||||
|
{
|
||||||
|
std::cout << "PcgM1 default (cheat) mrhs versoin"<<std::endl;
|
||||||
|
for(int rhs=0;rhs<in.size();rhs++){
|
||||||
|
this->PcgM1(in[rhs],out[rhs]);
|
||||||
|
}
|
||||||
|
}
|
||||||
virtual void PcgM1(Field & in, Field & out) =0;
|
virtual void PcgM1(Field & in, Field & out) =0;
|
||||||
virtual void Vstart(Field & x,const Field & src)=0;
|
virtual void Vstart(Field & x,const Field & src)=0;
|
||||||
|
|
||||||
@ -440,6 +500,7 @@ class TwoLevelADEF2 : public TwoLevelCG<Field>
|
|||||||
|
|
||||||
virtual void Vstart(Field & x,const Field & src)
|
virtual void Vstart(Field & x,const Field & src)
|
||||||
{
|
{
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg Vstart "<<std::endl;
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
// Choose x_0 such that
|
// Choose x_0 such that
|
||||||
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
|
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
|
||||||
@ -456,14 +517,97 @@ class TwoLevelADEF2 : public TwoLevelCG<Field>
|
|||||||
CoarseField PleftProj(this->coarsegrid);
|
CoarseField PleftProj(this->coarsegrid);
|
||||||
CoarseField PleftMss_proj(this->coarsegrid);
|
CoarseField PleftMss_proj(this->coarsegrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg Vstart projecting "<<std::endl;
|
||||||
this->_Aggregates.ProjectToSubspace(PleftProj,src);
|
this->_Aggregates.ProjectToSubspace(PleftProj,src);
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg Vstart coarse solve "<<std::endl;
|
||||||
this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
this->_CoarseSolverPrecise(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||||
|
std::cout << GridLogMessage<<"HDCG: fPcg Vstart promote "<<std::endl;
|
||||||
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
|
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,x);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class Field, class CoarseField, class Aggregation>
|
||||||
|
class TwoLevelADEF2mrhs : public TwoLevelADEF2<Field,CoarseField,Aggregation>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GridBase *coarsegridmrhs;
|
||||||
|
LinearFunction<CoarseField> &_CoarseSolverMrhs;
|
||||||
|
LinearFunction<CoarseField> &_CoarseGuesser;
|
||||||
|
TwoLevelADEF2mrhs(RealD tol,
|
||||||
|
Integer maxit,
|
||||||
|
LinearOperatorBase<Field> &FineLinop,
|
||||||
|
LinearFunction<Field> &Smoother,
|
||||||
|
LinearFunction<CoarseField> &CoarseSolver,
|
||||||
|
LinearFunction<CoarseField> &CoarseSolverPrecise,
|
||||||
|
LinearFunction<CoarseField> &CoarseSolverMrhs,
|
||||||
|
LinearFunction<CoarseField> &CoarseGuesser,
|
||||||
|
GridBase *rhsgrid,
|
||||||
|
Aggregation &Aggregates) :
|
||||||
|
TwoLevelADEF2<Field,CoarseField,Aggregation>(tol, maxit,FineLinop,Smoother,CoarseSolver,CoarseSolverPrecise,Aggregates),
|
||||||
|
_CoarseSolverMrhs(CoarseSolverMrhs),
|
||||||
|
_CoarseGuesser(CoarseGuesser)
|
||||||
|
{
|
||||||
|
coarsegridmrhs = rhsgrid;
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual void PcgM1(std::vector<Field> & in,std::vector<Field> & out){
|
||||||
|
|
||||||
|
int nrhs=in.size();
|
||||||
|
std::cout << " mrhs PcgM1 for "<<nrhs<<" right hand sides"<<std::endl;
|
||||||
|
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||||
|
Field tmp(this->grid);
|
||||||
|
std::vector<Field> Min(nrhs,this->grid);
|
||||||
|
CoarseField PleftProj(this->coarsegrid);
|
||||||
|
CoarseField PleftMss_proj(this->coarsegrid);
|
||||||
|
|
||||||
|
CoarseField PleftProjMrhs(this->coarsegridmrhs);
|
||||||
|
CoarseField PleftMss_projMrhs(this->coarsegridmrhs);
|
||||||
|
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " Calling smoother for "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
this->_Smoother(in[rhs],Min[rhs]);
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " smoother done "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
this->_FineLinop.HermOp(Min[rhs],out[rhs]);
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " Hermop for "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
axpy(tmp,-1.0,out[rhs],in[rhs]); // tmp = in - A Min
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " axpy "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
this->_Aggregates.ProjectToSubspace(PleftProj,tmp); // can optimise later
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " project "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
InsertSlice(PleftProj,PleftProjMrhs,rhs,0);
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " insert rhs "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
this->_CoarseGuesser(PleftProj,PleftMss_proj);
|
||||||
|
this->grid->Barrier();
|
||||||
|
std::cout << " insert guess "<<rhs<<std::endl;
|
||||||
|
this->grid->Barrier();
|
||||||
|
InsertSlice(PleftMss_proj,PleftMss_projMrhs,rhs,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << " Coarse solve "<<std::endl;
|
||||||
|
this->_CoarseSolverMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} [in - A Min]_s
|
||||||
|
|
||||||
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
||||||
|
ExtractSlice(PleftMss_proj,PleftMss_projMrhs,rhs,0);
|
||||||
|
this->_Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||||
|
axpy(out[rhs],1.0,Min[rhs],tmp); // Min+tmp
|
||||||
|
}
|
||||||
|
std::cout << " Extracted "<<std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<class Field>
|
template<class Field>
|
||||||
class TwoLevelADEF1defl : public TwoLevelCG<Field>
|
class TwoLevelADEF1defl : public TwoLevelCG<Field>
|
||||||
{
|
{
|
||||||
|
@ -243,9 +243,14 @@ int main (int argc, char ** argv)
|
|||||||
Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1});
|
Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1});
|
||||||
|
|
||||||
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
||||||
|
|
||||||
|
|
||||||
MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
|
MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
|
||||||
|
typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t;
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Test against single RHS
|
||||||
|
//////////////////////////////////////////
|
||||||
{
|
{
|
||||||
GridParallelRNG rh_CRNG(CoarseMrhs);rh_CRNG.SeedFixedIntegers(cseeds);
|
GridParallelRNG rh_CRNG(CoarseMrhs);rh_CRNG.SeedFixedIntegers(cseeds);
|
||||||
CoarseVector rh_phi(CoarseMrhs);
|
CoarseVector rh_phi(CoarseMrhs);
|
||||||
@ -287,7 +292,22 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << nrhs<< " srhs " << t1/ncall/nrhs <<" us"<<std::endl;
|
std::cout << nrhs<< " srhs " << t1/ncall/nrhs <<" us"<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Test against single RHS
|
||||||
|
//////////////////////////////////////////
|
||||||
|
{
|
||||||
|
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> HermMatrix;
|
||||||
|
HermMatrix MrhsCoarseOp (mrhs);
|
||||||
|
|
||||||
|
GridParallelRNG rh_CRNG(CoarseMrhs);rh_CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
ConjugateGradient<CoarseVector> mrhsCG(1.0e-8,2000,true);
|
||||||
|
CoarseVector rh_res(CoarseMrhs);
|
||||||
|
CoarseVector rh_src(CoarseMrhs);
|
||||||
|
random(rh_CRNG,rh_src);
|
||||||
|
rh_res= Zero();
|
||||||
|
mrhsCG(MrhsCoarseOp,rh_src,rh_res);
|
||||||
|
}
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<std::endl;
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
std::cout<<GridLogMessage<<std::endl;
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||||
|
739
tests/debug/Test_general_coarse_hdcg_phys48.cc
Normal file
739
tests/debug/Test_general_coarse_hdcg_phys48.cc
Normal file
@ -0,0 +1,739 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_general_coarse_hdcg.cc
|
||||||
|
|
||||||
|
Copyright (C) 2023
|
||||||
|
|
||||||
|
Author: Peter Boyle <pboyle@bnl.gov>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/lattice/PaddedCell.h>
|
||||||
|
#include <Grid/stencil/GeneralLocalStencil.h>
|
||||||
|
//#include <Grid/algorithms/GeneralCoarsenedMatrix.h>
|
||||||
|
#include <Grid/algorithms/iterative/AdefGeneric.h>
|
||||||
|
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
template<class Coarsened>
|
||||||
|
void SaveOperator(Coarsened &Operator,std::string file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
emptyUserRecord record;
|
||||||
|
ScidacWriter WR(Operator.Grid()->IsBoss());
|
||||||
|
assert(Operator._A.size()==Operator.geom.npoint);
|
||||||
|
WR.open(file);
|
||||||
|
for(int p=0;p<Operator._A.size();p++){
|
||||||
|
auto tmp = Operator.Cell.Extract(Operator._A[p]);
|
||||||
|
WR.writeScidacFieldRecord(tmp,record,0,0);
|
||||||
|
// WR.writeScidacFieldRecord(tmp,record,0,BINARYIO_LEXICOGRAPHIC);
|
||||||
|
}
|
||||||
|
WR.close();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
template<class Coarsened>
|
||||||
|
void LoadOperator(Coarsened &Operator,std::string file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::ScidacReader RD ;
|
||||||
|
RD.open(file);
|
||||||
|
assert(Operator._A.size()==Operator.geom.npoint);
|
||||||
|
for(int p=0;p<Operator.geom.npoint;p++){
|
||||||
|
conformable(Operator._A[p].Grid(),Operator.CoarseGrid());
|
||||||
|
// RD.readScidacFieldRecord(Operator._A[p],record,BINARYIO_LEXICOGRAPHIC);
|
||||||
|
RD.readScidacFieldRecord(Operator._A[p],record,0);
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
Operator.ExchangeCoarseLinks();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
template<class Coarsened>
|
||||||
|
void ReLoadOperator(Coarsened &Operator,std::string file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::ScidacReader RD ;
|
||||||
|
RD.open(file);
|
||||||
|
assert(Operator._A.size()==Operator.geom.npoint);
|
||||||
|
for(int p=0;p<Operator.geom.npoint;p++){
|
||||||
|
auto tmp=Operator.Cell.Extract(Operator._A[p]);
|
||||||
|
RD.readScidacFieldRecord(tmp,record,0);
|
||||||
|
Operator._A[p] = Operator.Cell.ExchangePeriodic(tmp);
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
template<class aggregation>
|
||||||
|
void SaveBasis(aggregation &Agg,std::string file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
emptyUserRecord record;
|
||||||
|
ScidacWriter WR(Agg.FineGrid->IsBoss());
|
||||||
|
WR.open(file);
|
||||||
|
for(int b=0;b<Agg.subspace.size();b++){
|
||||||
|
//WR.writeScidacFieldRecord(Agg.subspace[b],record,0,BINARYIO_LEXICOGRAPHIC);
|
||||||
|
WR.writeScidacFieldRecord(Agg.subspace[b],record,0,0);
|
||||||
|
}
|
||||||
|
WR.close();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
template<class aggregation>
|
||||||
|
void LoadBasis(aggregation &Agg, std::string file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
emptyUserRecord record;
|
||||||
|
ScidacReader RD ;
|
||||||
|
RD.open(file);
|
||||||
|
for(int b=0;b<Agg.subspace.size();b++){
|
||||||
|
// RD.readScidacFieldRecord(Agg.subspace[b],record,BINARYIO_LEXICOGRAPHIC);
|
||||||
|
RD.readScidacFieldRecord(Agg.subspace[b],record,0);
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
RealD InverseApproximation(RealD x){
|
||||||
|
return 1.0/x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Want Op in CoarsenOp to call MatPcDagMatPc
|
||||||
|
template<class Field>
|
||||||
|
class HermOpAdaptor : public LinearOperatorBase<Field>
|
||||||
|
{
|
||||||
|
LinearOperatorBase<Field> & wrapped;
|
||||||
|
public:
|
||||||
|
HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme) {};
|
||||||
|
void Op (const Field &in, Field &out) { wrapped.HermOp(in,out); }
|
||||||
|
void HermOp(const Field &in, Field &out) { wrapped.HermOp(in,out); }
|
||||||
|
void AdjOp (const Field &in, Field &out){ wrapped.HermOp(in,out); }
|
||||||
|
void OpDiag (const Field &in, Field &out) { assert(0); }
|
||||||
|
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
|
||||||
|
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
|
||||||
|
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
|
||||||
|
};
|
||||||
|
template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using LinearFunction<Field>::operator();
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
Chebyshev<Field> Cheby;
|
||||||
|
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl;
|
||||||
|
};
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
Field tmp(in.Grid());
|
||||||
|
tmp = in;
|
||||||
|
Cheby(_SmootherOperator,tmp,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Field> class CGSmoother : public LinearFunction<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using LinearFunction<Field>::operator();
|
||||||
|
typedef LinearOperatorBase<Field> FineOperator;
|
||||||
|
FineOperator & _SmootherOperator;
|
||||||
|
int iters;
|
||||||
|
CGSmoother(int _iters, FineOperator &SmootherOperator) :
|
||||||
|
_SmootherOperator(SmootherOperator),
|
||||||
|
iters(_iters)
|
||||||
|
{
|
||||||
|
std::cout << GridLogMessage<<" Mirs smoother order "<<iters<<std::endl;
|
||||||
|
};
|
||||||
|
void operator() (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
ConjugateGradient<Field> CG(0.0,iters,false); // non-converge is just fine in a smoother
|
||||||
|
CG(_SmootherOperator,in,out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
gridblasHandle_t GridBLAS::gridblasHandle;
|
||||||
|
int GridBLAS::gridblasInit;
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
const int Ls=24;
|
||||||
|
// const int nbasis = 62;
|
||||||
|
// const int nbasis = 56;
|
||||||
|
const int nbasis = 36;
|
||||||
|
const int cb = 0 ;
|
||||||
|
RealD mass=0.00078;
|
||||||
|
RealD M5=1.8;
|
||||||
|
RealD b=1.5;
|
||||||
|
RealD c=0.5;
|
||||||
|
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
// Construct a coarsened grid with 4^4 cell
|
||||||
|
Coordinate Block({4,4,6,4});
|
||||||
|
Coordinate clatt = GridDefaultLatt();
|
||||||
|
for(int d=0;d<clatt.size();d++){
|
||||||
|
clatt[d] = clatt[d]/Block[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt,
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());;
|
||||||
|
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||||
|
|
||||||
|
///////////////////////// RNGs /////////////////////////////////
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
std::vector<int> seeds5({5,6,7,8});
|
||||||
|
std::vector<int> cseeds({5,6,7,8});
|
||||||
|
|
||||||
|
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||||
|
|
||||||
|
///////////////////////// Configuration /////////////////////////////////
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
FieldMetaData header;
|
||||||
|
std::string file("ckpoint_lat.1000");
|
||||||
|
NerscIO::readConfiguration(Umu,header,file);
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
//////////////////////// Fermion action //////////////////////////////////
|
||||||
|
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
||||||
|
|
||||||
|
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
|
||||||
|
|
||||||
|
typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix;
|
||||||
|
HermFineMatrix FineHermOp(HermOpEO);
|
||||||
|
|
||||||
|
LatticeFermion result(FrbGrid); result=Zero();
|
||||||
|
|
||||||
|
LatticeFermion src(FrbGrid); random(RNG5,src);
|
||||||
|
|
||||||
|
// Run power method on FineHermOp
|
||||||
|
PowerMethod<LatticeFermion> PM; PM(HermOpEO,src);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
///////////// Coarse basis and Little Dirac Operator ///////
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||||
|
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||||
|
|
||||||
|
NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d);
|
||||||
|
NearestStencilGeometry5D geom_nn(Coarse5d);
|
||||||
|
|
||||||
|
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
||||||
|
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||||
|
Subspace Aggregates(Coarse5d,FrbGrid,cb);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Need to check about red-black grid coarsening
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
|
||||||
|
|
||||||
|
std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.rat.scidac.62");
|
||||||
|
std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.rat.scidac.62");
|
||||||
|
std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.rat.scidac.62");
|
||||||
|
bool load_agg=true;
|
||||||
|
bool load_refine=true;
|
||||||
|
bool load_mat=false;
|
||||||
|
MemoryManager::Print();
|
||||||
|
if ( load_agg ) {
|
||||||
|
LoadBasis(Aggregates,subspace_file);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// NBASIS=40
|
||||||
|
// Best so far: ord 2000 [0.01,95], 500,500 -- 466 iters
|
||||||
|
// slurm-398626.out:Grid : Message : 141.295253 s : 500 filt [1] <n|MdagM|n> 0.000103622063
|
||||||
|
|
||||||
|
|
||||||
|
//Grid : Message : 33.870465 s : Chebyshev subspace pass-1 : ord 2000 [0.001,95]
|
||||||
|
//Grid : Message : 33.870485 s : Chebyshev subspace pass-2 : nbasis40 min 1000 step 1000 lo0
|
||||||
|
//slurm-1482200.out : filt ~ 0.004 -- not as low mode projecting -- took 626 iters
|
||||||
|
|
||||||
|
// To try: 2000 [0.1,95] ,2000,500,500 -- slurm-1482213.out 586 iterations
|
||||||
|
|
||||||
|
// To try: 2000 [0.01,95] ,2000,500,500 -- 469 (think I bumped 92 to 95) (??)
|
||||||
|
// To try: 2000 [0.025,95],2000,500,500
|
||||||
|
// To try: 2000 [0.005,95],2000,500,500
|
||||||
|
|
||||||
|
// NBASIS=44 -- HDCG paper was 64 vectors; AMD compiler craps out at 48
|
||||||
|
// To try: 2000 [0.01,95] ,2000,500,500 -- 419 lowest slurm-1482355.out
|
||||||
|
// To try: 2000 [0.025,95] ,2000,500,500 -- 487
|
||||||
|
// To try: 2000 [0.005,95] ,2000,500,500
|
||||||
|
/*
|
||||||
|
Smoother [3,92] order 16
|
||||||
|
slurm-1482355.out:Grid : Message : 35.239686 s : Chebyshev subspace pass-1 : ord 2000 [0.01,95]
|
||||||
|
slurm-1482355.out:Grid : Message : 35.239714 s : Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0
|
||||||
|
slurm-1482355.out:Grid : Message : 5561.305552 s : HDCG: Pcg converged in 419 iterations and 2616.202598 s
|
||||||
|
|
||||||
|
slurm-1482367.out:Grid : Message : 43.157235 s : Chebyshev subspace pass-1 : ord 2000 [0.025,95]
|
||||||
|
slurm-1482367.out:Grid : Message : 43.157257 s : Chebyshev subspace pass-2 : nbasis44 min 500 step 500 lo0
|
||||||
|
slurm-1482367.out:Grid : Message : 6169.469330 s : HDCG: Pcg converged in 487 iterations and 3131.185821 s
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
||||||
|
95.0,0.0075,
|
||||||
|
2500,
|
||||||
|
500,
|
||||||
|
500,
|
||||||
|
0.0);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Aggregates.CreateSubspaceChebyshevPowerLaw(RNG5,HermOpEO,nbasis,
|
||||||
|
95.0,
|
||||||
|
2000);
|
||||||
|
*/
|
||||||
|
|
||||||
|
Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
|
||||||
|
0.0003,1.0e-5,2000); // Lo, tol, maxit
|
||||||
|
/*
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
||||||
|
95.0,0.05,
|
||||||
|
2000,
|
||||||
|
500,
|
||||||
|
500,
|
||||||
|
0.0);
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
|
||||||
|
95.0,0.01,
|
||||||
|
2000,
|
||||||
|
500,
|
||||||
|
500,
|
||||||
|
0.0);
|
||||||
|
*/
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); -- running slurm-1484934.out nbasis 56
|
||||||
|
|
||||||
|
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500); <== last run
|
||||||
|
SaveBasis(Aggregates,subspace_file);
|
||||||
|
}
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
int refine=1;
|
||||||
|
if(refine){
|
||||||
|
if ( load_refine ) {
|
||||||
|
LoadBasis(Aggregates,refine_file);
|
||||||
|
} else {
|
||||||
|
// HDCG used Pcg to refine
|
||||||
|
Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000);
|
||||||
|
SaveBasis(Aggregates,refine_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MemoryManager::Print();
|
||||||
|
Aggregates.Orthogonalise();
|
||||||
|
if ( load_mat ) {
|
||||||
|
LoadOperator(LittleDiracOp,ldop_file);
|
||||||
|
} else {
|
||||||
|
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
|
||||||
|
SaveOperator(LittleDiracOp,ldop_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// I/O test:
|
||||||
|
CoarseVector c_src(Coarse5d); random(CRNG,c_src);
|
||||||
|
CoarseVector c_res(Coarse5d);
|
||||||
|
CoarseVector c_ref(Coarse5d);
|
||||||
|
|
||||||
|
// Try projecting to one hop only
|
||||||
|
// LittleDiracOp.ShiftMatrix(1.0e-4);
|
||||||
|
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
|
||||||
|
LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
|
||||||
|
|
||||||
|
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
|
||||||
|
HermMatrix CoarseOp (LittleDiracOp);
|
||||||
|
HermMatrix CoarseOpProj (LittleDiracOpProj);
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Build a coarse lanczos
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Chebyshev<CoarseVector> IRLCheby(0.012,40.0,201); //500 HDCG iters
|
||||||
|
// int Nk=512; // Didn't save much
|
||||||
|
// int Nm=640;
|
||||||
|
// int Nstop=400;
|
||||||
|
|
||||||
|
// Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
|
||||||
|
// int Nk=128;
|
||||||
|
// int Nm=160;
|
||||||
|
|
||||||
|
// Chebyshev<CoarseVector> IRLCheby(0.005,40.0,201); //319 HDCG iters @ 128//160 nk.
|
||||||
|
Chebyshev<CoarseVector> IRLCheby(0.04,40.0,201); //319 HDCG iters @ 128//160 nk.
|
||||||
|
int Nk=192;
|
||||||
|
int Nm=256;
|
||||||
|
int Nstop=Nk;
|
||||||
|
|
||||||
|
// Chebyshev<CoarseVector> IRLCheby(0.010,45.0,201); // 1 iter
|
||||||
|
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp);
|
||||||
|
PlainHermOp<CoarseVector> IRLOp (CoarseOp);
|
||||||
|
|
||||||
|
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1e-5,10);
|
||||||
|
|
||||||
|
int Nconv;
|
||||||
|
std::vector<RealD> eval(Nm);
|
||||||
|
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||||
|
|
||||||
|
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
|
||||||
|
|
||||||
|
IRL.calc(eval,evec,c_src,Nconv);
|
||||||
|
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Build a coarse space solver
|
||||||
|
//////////////////////////////////////////
|
||||||
|
int maxit=30000;
|
||||||
|
ConjugateGradient<CoarseVector> CG(1.0e-8,maxit,false);
|
||||||
|
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
|
||||||
|
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
|
||||||
|
|
||||||
|
|
||||||
|
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
|
||||||
|
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
|
||||||
|
c_res=Zero();
|
||||||
|
// HPDSolve(c_src,c_res); c_ref = c_res;
|
||||||
|
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
||||||
|
// std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl;
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Deflated (with real op EV's) solve for the projected coarse op
|
||||||
|
// Work towards ADEF1 in the coarse space
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
|
||||||
|
c_res=Zero();
|
||||||
|
// HPDSolveProj(c_src,c_res);
|
||||||
|
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
||||||
|
// std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl;
|
||||||
|
// c_res = c_res - c_ref;
|
||||||
|
// std::cout << "Projected solver error "<<norm2(c_res)<<std::endl;
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Coarse ADEF1 with deflation space
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
ChebyshevSmoother<CoarseVector > CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
|
||||||
|
// CoarseSmoother(0.1,37.,8,CoarseOpProj); //
|
||||||
|
// CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s
|
||||||
|
// CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s
|
||||||
|
// CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter
|
||||||
|
// CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter
|
||||||
|
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj); // 311
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// CG, Cheby mode spacing 200,200
|
||||||
|
// Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
|
||||||
|
// Unprojected Coarse CG solve to 4e-2 : 33 iters, 0.8s
|
||||||
|
// Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs
|
||||||
|
////////////////////////////////////////////////////////
|
||||||
|
// ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s 2.1x gain
|
||||||
|
// ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
|
||||||
|
// HDCG 38 iters 162s
|
||||||
|
//
|
||||||
|
// CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs
|
||||||
|
// ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s 2.1x gain
|
||||||
|
// ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
|
||||||
|
// HDCG 38 iters 169s
|
||||||
|
|
||||||
|
/*
|
||||||
|
TwoLevelADEF1defl<CoarseVector>
|
||||||
|
cADEF1(1.0e-8, 500,
|
||||||
|
CoarseOp,
|
||||||
|
CoarseSmoother,
|
||||||
|
evec,eval);
|
||||||
|
*/
|
||||||
|
// c_res=Zero();
|
||||||
|
// cADEF1(c_src,c_res);
|
||||||
|
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
||||||
|
// std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
|
||||||
|
// c_res = c_res - c_ref;
|
||||||
|
// std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
|
||||||
|
|
||||||
|
// cADEF1.Tolerance = 4.0e-2;
|
||||||
|
// cADEF1.Tolerance = 1.0e-1;
|
||||||
|
// cADEF1.Tolerance = 5.0e-2;
|
||||||
|
// c_res=Zero();
|
||||||
|
// cADEF1(c_src,c_res);
|
||||||
|
// std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
|
||||||
|
// std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
|
||||||
|
// c_res = c_res - c_ref;
|
||||||
|
// std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Build a smoother
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499
|
||||||
|
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp); //383
|
||||||
|
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp); //328
|
||||||
|
// std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1
|
||||||
|
// std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations
|
||||||
|
// std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters
|
||||||
|
// std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters
|
||||||
|
|
||||||
|
//
|
||||||
|
// Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40
|
||||||
|
// Need to measure cost of coarse space.
|
||||||
|
//
|
||||||
|
// -- i) Reduce coarse residual -- 0.04
|
||||||
|
// -- ii) Lanczos on coarse space -- done
|
||||||
|
// -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and
|
||||||
|
// use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
//
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
//////////////////////////////////////
|
||||||
|
// mrhs coarse solve
|
||||||
|
// Create a higher dim coarse grid
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
ConjugateGradient<CoarseVector> coarseCG(2.0e-2,20000,true);
|
||||||
|
|
||||||
|
const int nrhs=vComplex::Nsimd();
|
||||||
|
|
||||||
|
Coordinate mpi=GridDefaultMpi();
|
||||||
|
Coordinate rhMpi ({1,1,mpi[0],mpi[1],mpi[2],mpi[3]});
|
||||||
|
Coordinate rhLatt({nrhs,1,clatt[0],clatt[1],clatt[2],clatt[3]});
|
||||||
|
Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1});
|
||||||
|
|
||||||
|
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
||||||
|
MultiGeneralCoarsenedMatrix mrhs(LittleDiracOp,CoarseMrhs);
|
||||||
|
typedef decltype(mrhs) MultiGeneralCoarsenedMatrix_t;
|
||||||
|
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
|
||||||
|
MrhsHermMatrix MrhsCoarseOp (mrhs);
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
{
|
||||||
|
CoarseVector rh_res(CoarseMrhs);
|
||||||
|
CoarseVector rh_guess(CoarseMrhs);
|
||||||
|
CoarseVector rh_src(CoarseMrhs);
|
||||||
|
|
||||||
|
rh_res= Zero();
|
||||||
|
rh_guess= Zero();
|
||||||
|
for(int r=0;r<nrhs;r++){
|
||||||
|
random(CRNG,c_src);
|
||||||
|
DeflCoarseGuesser(c_src,c_res);
|
||||||
|
InsertSlice(c_res,rh_res,r,0);
|
||||||
|
InsertSlice(c_res,rh_guess,r,0);
|
||||||
|
InsertSlice(c_src,rh_src,r,0);
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
coarseCG(MrhsCoarseOp,rh_src,rh_res);
|
||||||
|
MemoryManager::Print();
|
||||||
|
//redo with block CG
|
||||||
|
|
||||||
|
for(int r=0;r<nrhs;r++){
|
||||||
|
std::cout << " compare to single RHS "<<r<<"/"<<nrhs<<std::endl;
|
||||||
|
ExtractSlice(c_src,rh_src,r,0);
|
||||||
|
ExtractSlice(c_res,rh_res,r,0);
|
||||||
|
ExtractSlice(c_ref,rh_guess,r,0);
|
||||||
|
coarseCG(CoarseOp,c_src,c_ref);
|
||||||
|
std::cout << " mrhs [" <<r <<"] "<< norm2(c_res)<<std::endl;
|
||||||
|
std::cout << " srhs [" <<r <<"] "<< norm2(c_ref)<<std::endl;
|
||||||
|
c_ref=c_ref-c_res;
|
||||||
|
RealD diff =norm2(c_ref)/norm2(c_src);
|
||||||
|
std::cout << r << " diff " << diff<<std::endl;
|
||||||
|
assert(diff < 1.0e-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MemoryManager::Print();
|
||||||
|
//////////////////////////////////////
|
||||||
|
// fine solve
|
||||||
|
//////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
// std::vector<RealD> los({2.0,2.5}); // Nbasis 40 == 36,36 iters
|
||||||
|
std::vector<RealD> los({2.0}); // Nbasis 40 == 36,36 iters
|
||||||
|
|
||||||
|
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
|
||||||
|
// std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
|
||||||
|
std::vector<int> ords({9}); // Nbasis 40 == 40 iters (320 mults)
|
||||||
|
|
||||||
|
/*
|
||||||
|
Smoother opt @56 nbasis, 0.04 convergence, 192 evs
|
||||||
|
ord lo
|
||||||
|
|
||||||
|
16 0.1 no converge -- likely sign indefinite
|
||||||
|
32 0.1 no converge -- likely sign indefinite(?)
|
||||||
|
|
||||||
|
16 0.5 422
|
||||||
|
32 0.5 302
|
||||||
|
|
||||||
|
8 1.0 575
|
||||||
|
12 1.0 449
|
||||||
|
16 1.0 375
|
||||||
|
32 1.0 302
|
||||||
|
|
||||||
|
12 3.0 476
|
||||||
|
16 3.0 319
|
||||||
|
32 3.0 306
|
||||||
|
|
||||||
|
Powerlaw setup 62 vecs
|
||||||
|
slurm-1494943.out:Grid : Message : 4874.186617 s : HDCG: Pcg converged in 171 iterations and 1706.548006 s 1.0 32
|
||||||
|
slurm-1494943.out:Grid : Message : 6490.121648 s : HDCG: Pcg converged in 194 iterations and 1616.219654 s 1.0 16
|
||||||
|
|
||||||
|
Cheby setup: 56vecs
|
||||||
|
-- CG smoother O(16): 487
|
||||||
|
|
||||||
|
Power law setup, 56 vecs -- lambda^-5
|
||||||
|
slurm-1494383.out:Grid : Message : 4377.173265 s : HDCG: Pcg converged in 204 iterations and 1153.548935 s 1.0 32
|
||||||
|
|
||||||
|
Power law setup, 56 vecs -- lambda^-3
|
||||||
|
|
||||||
|
slurm-1494242.out:Grid : Message : 4370.464814 s : HDCG: Pcg converged in 204 iterations and 1143.494776 s 1.0 32
|
||||||
|
slurm-1494242.out:Grid : Message : 5432.414820 s : HDCG: Pcg converged in 237 iterations and 1061.455882 s 1.0 16
|
||||||
|
slurm-1494242.out:Grid : Message : 6588.727977 s : HDCG: Pcg converged in 205 iterations and 1156.565210 s 0.5 32
|
||||||
|
|
||||||
|
Power law setup, 56 vecs -- lambda^-4
|
||||||
|
-- CG smoother O(16): 290
|
||||||
|
-- Cheby smoother O(16): 218 -- getting close to the deflation level I expect 169 from BFM paper @O(7) smoother and 64 nbasis
|
||||||
|
|
||||||
|
Grid : Message : 2790.797194 s : HDCG: Pcg converged in 190 iterations and 1049.563182 s 1.0 32
|
||||||
|
Grid : Message : 3766.374396 s : HDCG: Pcg converged in 218 iterations and 975.455668 s 1.0 16
|
||||||
|
Grid : Message : 4888.746190 s : HDCG: Pcg converged in 191 iterations and 1122.252055 s 0.5 32
|
||||||
|
Grid : Message : 5956.679661 s : HDCG: Pcg converged in 231 iterations and 1067.812850 s 0.5 16
|
||||||
|
|
||||||
|
Grid : Message : 2767.405829 s : HDCG: Pcg converged in 218 iterations and 967.214067 s -- 16
|
||||||
|
Grid : Message : 3816.165905 s : HDCG: Pcg converged in 251 iterations and 1048.636269 s -- 12
|
||||||
|
Grid : Message : 5121.206572 s : HDCG: Pcg converged in 318 iterations and 1304.916168 s -- 8
|
||||||
|
|
||||||
|
|
||||||
|
[paboyle@login2.crusher debug]$ grep -v Memory slurm-402426.out | grep converged | grep HDCG -- [1.0,16] cheby
|
||||||
|
Grid : Message : 5185.521063 s : HDCG: Pcg converged in 377 iterations and 1595.843529 s
|
||||||
|
|
||||||
|
[paboyle@login2.crusher debug]$ grep HDCG slurm-402184.out | grep onver
|
||||||
|
Grid : Message : 3760.438160 s : HDCG: Pcg converged in 422 iterations and 2129.243141 s
|
||||||
|
Grid : Message : 5660.588015 s : HDCG: Pcg converged in 308 iterations and 1900.026821 s
|
||||||
|
|
||||||
|
|
||||||
|
Grid : Message : 4238.206528 s : HDCG: Pcg converged in 575 iterations and 2657.430676 s
|
||||||
|
Grid : Message : 6345.880344 s : HDCG: Pcg converged in 449 iterations and 2108.505208 s
|
||||||
|
|
||||||
|
grep onverg slurm-401663.out | grep HDCG
|
||||||
|
Grid : Message : 3900.817781 s : HDCG: Pcg converged in 476 iterations and 1992.591311 s
|
||||||
|
Grid : Message : 5647.202699 s : HDCG: Pcg converged in 306 iterations and 1746.838660 s
|
||||||
|
|
||||||
|
|
||||||
|
[paboyle@login2.crusher debug]$ grep converged slurm-401775.out | grep HDCG
|
||||||
|
Grid : Message : 3583.177025 s : HDCG: Pcg converged in 375 iterations and 1800.896037 s
|
||||||
|
Grid : Message : 5348.342243 s : HDCG: Pcg converged in 302 iterations and 1765.045018 s
|
||||||
|
|
||||||
|
Conclusion: higher order smoother is doing better. Much better. Use a Krylov smoother instead Mirs as in BFM version.
|
||||||
|
|
||||||
|
*/
|
||||||
|
//
|
||||||
|
MemoryManager::Print();
|
||||||
|
for(int l=0;l<los.size();l++){
|
||||||
|
|
||||||
|
RealD lo = los[l];
|
||||||
|
|
||||||
|
for(int o=0;o<ords.size();o++){
|
||||||
|
|
||||||
|
ConjugateGradient<CoarseVector> CGsloppy(4.0e-2,maxit,false);
|
||||||
|
HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
|
||||||
|
|
||||||
|
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
|
||||||
|
ChebyshevSmoother<LatticeFermionD > ChebySmooth(lo,95,ords[o],FineHermOp); // 311
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CG smooth 11 iter:
|
||||||
|
slurm-403825.out:Grid : Message : 4369.824339 s : HDCG: fPcg converged in 215 iterations 3.0
|
||||||
|
slurm-403908.out:Grid : Message : 3955.897470 s : HDCG: fPcg converged in 236 iterations 1.0
|
||||||
|
slurm-404273.out:Grid : Message : 3843.792191 s : HDCG: fPcg converged in 210 iterations 2.0
|
||||||
|
* CG smooth 9 iter:
|
||||||
|
*/
|
||||||
|
//
|
||||||
|
RealD MirsShift = lo;
|
||||||
|
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
|
||||||
|
CGSmoother<LatticeFermionD> CGsmooth(ords[o],ShiftedFineHermOp) ;
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Build a HDCG solver
|
||||||
|
//////////////////////////////////////////
|
||||||
|
/* TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
|
||||||
|
HDCG(1.0e-8, 700,
|
||||||
|
FineHermOp,
|
||||||
|
CGsmooth,
|
||||||
|
HPDSolveSloppy,
|
||||||
|
HPDSolve,
|
||||||
|
Aggregates);
|
||||||
|
result=Zero();
|
||||||
|
*/
|
||||||
|
// std::cout << "Calling HDCG"<<std::endl;
|
||||||
|
// HDCG(src,result);
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Build a HDCG mrhs solver
|
||||||
|
//////////////////////////////////////////
|
||||||
|
MemoryManager::Print();
|
||||||
|
DoNothingGuesser<CoarseVector> DoNothing;
|
||||||
|
HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,coarseCG,DoNothing);
|
||||||
|
TwoLevelADEF2mrhs<LatticeFermion,CoarseVector,Subspace>
|
||||||
|
HDCGmrhs(1.0e-8, 700,
|
||||||
|
FineHermOp,
|
||||||
|
CGsmooth,
|
||||||
|
HPDSolveSloppy, // Never used
|
||||||
|
HPDSolve, // Used in Vstart
|
||||||
|
HPDSolveMrhs, // Used in M1
|
||||||
|
DeflCoarseGuesser, // single RHS guess used in M1
|
||||||
|
CoarseMrhs, // Grid needed to Mrhs grid
|
||||||
|
Aggregates);
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
std::cout << "Calling mRHS HDCG"<<std::endl;
|
||||||
|
FrbGrid->Barrier();
|
||||||
|
|
||||||
|
MemoryManager::Print();
|
||||||
|
std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid);
|
||||||
|
std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid);
|
||||||
|
MemoryManager::Print();
|
||||||
|
for(int r=0;r<nrhs;r++){
|
||||||
|
random(RNG5,src_mrhs[r]);
|
||||||
|
res_mrhs[r]=Zero();
|
||||||
|
std::cout << "Setup mrhs source "<<r<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "Calling the mRHS HDCG"<<std::endl;
|
||||||
|
MemoryManager::Print();
|
||||||
|
HDCGmrhs(src_mrhs,res_mrhs);
|
||||||
|
MemoryManager::Print();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Standard CG
|
||||||
|
result=Zero();
|
||||||
|
CGfine(HermOpEO, src, result);
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user