mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-16 14:57:05 +01:00
735 lines
24 KiB
C++
735 lines
24 KiB
C++
/*************************************************************************************
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./lib/algorithms/iterative/AdefGeneric.h
|
|
|
|
Copyright (C) 2015
|
|
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
#pragma once
|
|
|
|
|
|
/*
|
|
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
|
* Script A = SolverMatrix
|
|
* Script P = Preconditioner
|
|
*
|
|
* Implement ADEF-2
|
|
*
|
|
* Vstart = P^Tx + Qb
|
|
* M1 = P^TM + Q
|
|
* M2=M3=1
|
|
*/
|
|
NAMESPACE_BEGIN(Grid);
|
|
|
|
|
|
template<class Field>
|
|
class TwoLevelCGmrhs
|
|
{
|
|
public:
|
|
RealD Tolerance;
|
|
Integer MaxIterations;
|
|
GridBase *grid;
|
|
|
|
// Fine operator, Smoother, CoarseSolver
|
|
LinearOperatorBase<Field> &_FineLinop;
|
|
LinearFunction<Field> &_Smoother;
|
|
MultiRHSBlockCGLinalg<Field> _BlockCGLinalg;
|
|
|
|
GridStopWatch ProjectTimer;
|
|
GridStopWatch PromoteTimer;
|
|
GridStopWatch DeflateTimer;
|
|
GridStopWatch CoarseTimer;
|
|
GridStopWatch FineTimer;
|
|
GridStopWatch SmoothTimer;
|
|
GridStopWatch InsertTimer;
|
|
|
|
/*
|
|
Field rrr;
|
|
Field sss;
|
|
Field qqq;
|
|
Field zzz;
|
|
*/
|
|
// more most opertor functions
|
|
TwoLevelCGmrhs(RealD tol,
|
|
Integer maxit,
|
|
LinearOperatorBase<Field> &FineLinop,
|
|
LinearFunction<Field> &Smoother,
|
|
GridBase *fine) :
|
|
Tolerance(tol),
|
|
MaxIterations(maxit),
|
|
_FineLinop(FineLinop),
|
|
_Smoother(Smoother)
|
|
/*
|
|
rrr(fine),
|
|
sss(fine),
|
|
qqq(fine),
|
|
zzz(fine)
|
|
*/
|
|
{
|
|
grid = fine;
|
|
};
|
|
|
|
// Vector case
|
|
virtual void operator() (std::vector<Field> &src, std::vector<Field> &x)
|
|
{
|
|
// SolveSingleSystem(src,x);
|
|
SolvePrecBlockCG(src,x);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Thin QR factorisation (google it)
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
//Dimensions
|
|
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
|
//
|
|
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
|
|
//
|
|
// Q C = R => Q = R C^{-1}
|
|
//
|
|
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
|
|
//
|
|
// Set C = L^{dag}, and then Q^dag Q = ident
|
|
//
|
|
// Checks:
|
|
// Cdag C = Rdag R ; passes.
|
|
// QdagQ = 1 ; passes
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
void ThinQRfact (Eigen::MatrixXcd &m_zz,
|
|
Eigen::MatrixXcd &C,
|
|
Eigen::MatrixXcd &Cinv,
|
|
std::vector<Field> & Q,
|
|
std::vector<Field> & MQ,
|
|
const std::vector<Field> & Z,
|
|
const std::vector<Field> & MZ)
|
|
{
|
|
RealD t0=usecond();
|
|
_BlockCGLinalg.InnerProductMatrix(m_zz,MZ,Z);
|
|
RealD t1=usecond();
|
|
|
|
m_zz = 0.5*(m_zz+m_zz.adjoint());
|
|
|
|
Eigen::MatrixXcd L = m_zz.llt().matrixL();
|
|
|
|
C = L.adjoint();
|
|
Cinv = C.inverse();
|
|
|
|
RealD t3=usecond();
|
|
_BlockCGLinalg.MulMatrix( Q,Cinv,Z);
|
|
_BlockCGLinalg.MulMatrix(MQ,Cinv,MZ);
|
|
RealD t4=usecond();
|
|
std::cout << " ThinQRfact IP :"<< t1-t0<<" us"<<std::endl;
|
|
std::cout << " ThinQRfact Eigen :"<< t3-t1<<" us"<<std::endl;
|
|
std::cout << " ThinQRfact MulMat:"<< t4-t3<<" us"<<std::endl;
|
|
}
|
|
|
|
virtual void SolvePrecBlockCG (std::vector<Field> &src, std::vector<Field> &X)
|
|
{
|
|
std::cout << GridLogMessage<<"HDCG: mrhs fPrecBlockcg starting"<<std::endl;
|
|
src[0].Grid()->Barrier();
|
|
int nrhs = src.size();
|
|
// std::vector<RealD> f(nrhs);
|
|
// std::vector<RealD> rtzp(nrhs);
|
|
// std::vector<RealD> rtz(nrhs);
|
|
// std::vector<RealD> a(nrhs);
|
|
// std::vector<RealD> d(nrhs);
|
|
// std::vector<RealD> b(nrhs);
|
|
// std::vector<RealD> rptzp(nrhs);
|
|
|
|
////////////////////////////////////////////
|
|
//Initial residual computation & set up
|
|
////////////////////////////////////////////
|
|
std::vector<RealD> ssq(nrhs);
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
ssq[rhs]=norm2(src[rhs]); assert(ssq[rhs]!=0.0);
|
|
}
|
|
|
|
///////////////////////////
|
|
// Fields -- eliminate duplicates between fPcg and block cg
|
|
///////////////////////////
|
|
std::vector<Field> Mtmp(nrhs,grid);
|
|
std::vector<Field> tmp(nrhs,grid);
|
|
std::vector<Field> Z(nrhs,grid); // Rename Z to R
|
|
std::vector<Field> MZ(nrhs,grid); // Rename MZ to Z
|
|
std::vector<Field> Q(nrhs,grid); //
|
|
std::vector<Field> MQ(nrhs,grid); // Rename to P
|
|
std::vector<Field> D(nrhs,grid);
|
|
std::vector<Field> AD(nrhs,grid);
|
|
|
|
/************************************************************************
|
|
* Preconditioned Block conjugate gradient rQ
|
|
* Generalise Sebastien Birk Thesis, after Dubrulle 2001.
|
|
* Introduce preconditioning following Saad Ch9
|
|
************************************************************************
|
|
* Dimensions:
|
|
*
|
|
* X,B etc... ==(Nferm x nrhs)
|
|
* Matrix A==(Nferm x Nferm)
|
|
*
|
|
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
|
* QC => Thin QR factorisation (google it)
|
|
*
|
|
* R = B-AX
|
|
* Z = Mi R
|
|
* QC = Z
|
|
* D = Q
|
|
* for k:
|
|
* R = AD
|
|
* Z = Mi R
|
|
* M = [D^dag R]^{-1}
|
|
* X = X + D M C
|
|
* QS = Q - Z.M
|
|
* D = Q + D S^dag
|
|
* C = S C
|
|
*/
|
|
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_zz = Eigen::MatrixXcd::Zero(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(nrhs,nrhs);
|
|
|
|
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(nrhs,nrhs);
|
|
|
|
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(nrhs,nrhs);
|
|
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(nrhs,nrhs);
|
|
|
|
GridStopWatch HDCGTimer;
|
|
|
|
//////////////////////////
|
|
// x0 = Vstart -- possibly modify guess
|
|
//////////////////////////
|
|
Vstart(X,src);
|
|
|
|
//////////////////////////
|
|
// R = B-AX
|
|
//////////////////////////
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
// r0 = b -A x0
|
|
_FineLinop.HermOp(X[rhs],tmp[rhs]);
|
|
axpy (Z[rhs], -1.0,tmp[rhs], src[rhs]); // Computes R=Z=src - A X0
|
|
}
|
|
|
|
//////////////////////////////////
|
|
// Compute MZ = M1 Z = M1 B - M1 A x0
|
|
//////////////////////////////////
|
|
PcgM1(Z,MZ);
|
|
|
|
//////////////////////////////////
|
|
// QC = Z
|
|
//////////////////////////////////
|
|
ThinQRfact (m_zz, m_C, m_Cinv, Q, MQ, Z, MZ);
|
|
|
|
//////////////////////////////////
|
|
// D=MQ
|
|
//////////////////////////////////
|
|
for(int b=0;b<nrhs;b++) D[b]=MQ[b]; // LLT rotation of the MZ basis of search dirs
|
|
|
|
std::cout << GridLogMessage<<"PrecBlockCGrQ vec computed initial residual and QR fact " <<std::endl;
|
|
|
|
ProjectTimer.Reset();
|
|
PromoteTimer.Reset();
|
|
DeflateTimer.Reset();
|
|
CoarseTimer.Reset();
|
|
SmoothTimer.Reset();
|
|
FineTimer.Reset();
|
|
InsertTimer.Reset();
|
|
|
|
GridStopWatch M1Timer;
|
|
GridStopWatch M2Timer;
|
|
GridStopWatch M3Timer;
|
|
GridStopWatch LinalgTimer;
|
|
GridStopWatch InnerProdTimer;
|
|
|
|
HDCGTimer.Start();
|
|
|
|
std::vector<RealD> rn(nrhs);
|
|
for (int k=0;k<=MaxIterations;k++){
|
|
|
|
////////////////////
|
|
// Z = AD
|
|
////////////////////
|
|
M3Timer.Start();
|
|
for(int b=0;b<nrhs;b++) _FineLinop.HermOp(D[b], Z[b]);
|
|
M3Timer.Stop();
|
|
|
|
////////////////////
|
|
// MZ = M1 Z <==== the Multigrid preconditioner
|
|
////////////////////
|
|
M1Timer.Start();
|
|
PcgM1(Z,MZ);
|
|
M1Timer.Stop();
|
|
|
|
FineTimer.Start();
|
|
////////////////////
|
|
// M = [D^dag Z]^{-1} = (<Ddag MZ>_M)^{-1} inner prod, generalising Saad derivation of Precon CG
|
|
////////////////////
|
|
InnerProdTimer.Start();
|
|
_BlockCGLinalg.InnerProductMatrix(m_DZ,D,Z);
|
|
InnerProdTimer.Stop();
|
|
m_M = m_DZ.inverse();
|
|
|
|
///////////////////////////
|
|
// X = X + D MC
|
|
///////////////////////////
|
|
m_tmp = m_M * m_C;
|
|
LinalgTimer.Start();
|
|
_BlockCGLinalg.MaddMatrix(X,m_tmp, D,X); // D are the search directions and X takes the updates
|
|
LinalgTimer.Stop();
|
|
|
|
///////////////////////////
|
|
// QS = Q - M Z
|
|
// (MQ) S = MQ - M (M1Z)
|
|
///////////////////////////
|
|
LinalgTimer.Start();
|
|
_BlockCGLinalg.MaddMatrix(tmp ,m_M, Z, Q,-1.0);
|
|
_BlockCGLinalg.MaddMatrix(Mtmp,m_M,MZ,MQ,-1.0);
|
|
ThinQRfact (m_zz, m_S, m_Sinv, Q, MQ, tmp, Mtmp);
|
|
LinalgTimer.Stop();
|
|
|
|
////////////////////////////
|
|
// D = MQ + D S^dag
|
|
////////////////////////////
|
|
m_tmp = m_S.adjoint();
|
|
LinalgTimer.Start();
|
|
_BlockCGLinalg.MaddMatrix(D,m_tmp,D,MQ);
|
|
LinalgTimer.Stop();
|
|
|
|
////////////////////////////
|
|
// C = S C
|
|
////////////////////////////
|
|
m_C = m_S*m_C;
|
|
|
|
////////////////////////////
|
|
// convergence monitor
|
|
////////////////////////////
|
|
m_rr = m_C.adjoint() * m_C;
|
|
|
|
FineTimer.Stop();
|
|
|
|
RealD max_resid=0;
|
|
RealD rrsum=0;
|
|
RealD sssum=0;
|
|
RealD rr;
|
|
|
|
for(int b=0;b<nrhs;b++) {
|
|
rrsum+=real(m_rr(b,b));
|
|
sssum+=ssq[b];
|
|
rr = real(m_rr(b,b))/ssq[b];
|
|
if ( rr > max_resid ) max_resid = rr;
|
|
}
|
|
std::cout << GridLogMessage <<
|
|
"\t Prec BlockCGrQ Iteration "<<k<<" ave resid "<< std::sqrt(rrsum/sssum) << " max "<< std::sqrt(max_resid) <<std::endl;
|
|
|
|
|
|
if ( max_resid < Tolerance*Tolerance ) {
|
|
|
|
HDCGTimer.Stop();
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Linalg "<<LinalgTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : fine H "<<M3Timer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : prec M1 "<<M1Timer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"**** M1 breakdown:"<<std::endl;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Project "<<ProjectTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Promote "<<PromoteTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Deflate "<<DeflateTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Coarse "<<CoarseTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Fine "<<FineTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Smooth "<<SmoothTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs PrecBlockCGrQ : Insert "<<InsertTimer.Elapsed()<<std::endl;;
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
|
|
_FineLinop.HermOp(X[rhs],tmp[rhs]);
|
|
|
|
Field mytmp(grid);
|
|
axpy(mytmp,-1.0,src[rhs],tmp[rhs]);
|
|
|
|
RealD xnorm = sqrt(norm2(X[rhs]));
|
|
RealD srcnorm = sqrt(norm2(src[rhs]));
|
|
RealD tmpnorm = sqrt(norm2(mytmp));
|
|
RealD true_residual = tmpnorm/srcnorm;
|
|
std::cout<<GridLogMessage
|
|
<<"HDCG: true residual ["<<rhs<<"] is "<<true_residual
|
|
<<" solution "<<xnorm
|
|
<<" source "<<srcnorm
|
|
<<std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
}
|
|
HDCGTimer.Stop();
|
|
std::cout<<GridLogMessage<<"HDCG: PrecBlockCGrQ not converged "<<HDCGTimer.Elapsed()<<std::endl;
|
|
assert(0);
|
|
}
|
|
|
|
virtual void SolveSingleSystem (std::vector<Field> &src, std::vector<Field> &x)
|
|
{
|
|
std::cout << GridLogMessage<<"HDCG: mrhs fPcg starting"<<std::endl;
|
|
src[0].Grid()->Barrier();
|
|
int nrhs = src.size();
|
|
std::vector<RealD> f(nrhs);
|
|
std::vector<RealD> rtzp(nrhs);
|
|
std::vector<RealD> rtz(nrhs);
|
|
std::vector<RealD> a(nrhs);
|
|
std::vector<RealD> d(nrhs);
|
|
std::vector<RealD> b(nrhs);
|
|
std::vector<RealD> rptzp(nrhs);
|
|
/////////////////////////////
|
|
// Set up history vectors
|
|
/////////////////////////////
|
|
int mmax = 3;
|
|
|
|
std::vector<std::vector<Field> > p(nrhs); for(int r=0;r<nrhs;r++) p[r].resize(mmax,grid);
|
|
std::vector<std::vector<Field> > mmp(nrhs); for(int r=0;r<nrhs;r++) mmp[r].resize(mmax,grid);
|
|
std::vector<std::vector<RealD> > pAp(nrhs); for(int r=0;r<nrhs;r++) pAp[r].resize(mmax);
|
|
|
|
std::vector<Field> z(nrhs,grid);
|
|
std::vector<Field> mp (nrhs,grid);
|
|
std::vector<Field> r (nrhs,grid);
|
|
std::vector<Field> mu (nrhs,grid);
|
|
|
|
//Initial residual computation & set up
|
|
std::vector<RealD> src_nrm(nrhs);
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
src_nrm[rhs]=norm2(src[rhs]);
|
|
assert(src_nrm[rhs]!=0.0);
|
|
}
|
|
std::vector<RealD> tn(nrhs);
|
|
|
|
GridStopWatch HDCGTimer;
|
|
//////////////////////////
|
|
// x0 = Vstart -- possibly modify guess
|
|
//////////////////////////
|
|
Vstart(x,src);
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
// r0 = b -A x0
|
|
_FineLinop.HermOp(x[rhs],mmp[rhs][0]);
|
|
axpy (r[rhs], -1.0,mmp[rhs][0], src[rhs]); // Recomputes r=src-Ax0
|
|
}
|
|
|
|
//////////////////////////////////
|
|
// Compute z = M1 x
|
|
//////////////////////////////////
|
|
// This needs a multiRHS version for acceleration
|
|
PcgM1(r,z);
|
|
|
|
std::vector<RealD> ssq(nrhs);
|
|
std::vector<RealD> rsq(nrhs);
|
|
std::vector<Field> pp(nrhs,grid);
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
rtzp[rhs] =real(innerProduct(r[rhs],z[rhs]));
|
|
p[rhs][0]=z[rhs];
|
|
ssq[rhs]=norm2(src[rhs]);
|
|
rsq[rhs]= ssq[rhs]*Tolerance*Tolerance;
|
|
// std::cout << GridLogMessage<<"mrhs HDCG: "<<rhs<<" k=0 residual "<<rtzp[rhs]<<" rsq "<<rsq[rhs]<<"\n";
|
|
}
|
|
|
|
ProjectTimer.Reset();
|
|
PromoteTimer.Reset();
|
|
DeflateTimer.Reset();
|
|
CoarseTimer.Reset();
|
|
SmoothTimer.Reset();
|
|
FineTimer.Reset();
|
|
InsertTimer.Reset();
|
|
|
|
GridStopWatch M1Timer;
|
|
GridStopWatch M2Timer;
|
|
GridStopWatch M3Timer;
|
|
GridStopWatch LinalgTimer;
|
|
|
|
HDCGTimer.Start();
|
|
|
|
std::vector<RealD> rn(nrhs);
|
|
for (int k=0;k<=MaxIterations;k++){
|
|
|
|
int peri_k = k % mmax;
|
|
int peri_kp = (k+1) % mmax;
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
rtz[rhs]=rtzp[rhs];
|
|
M3Timer.Start();
|
|
d[rhs]= PcgM3(p[rhs][peri_k],mmp[rhs][peri_k]);
|
|
M3Timer.Stop();
|
|
a[rhs] = rtz[rhs]/d[rhs];
|
|
|
|
LinalgTimer.Start();
|
|
// Memorise this
|
|
pAp[rhs][peri_k] = d[rhs];
|
|
|
|
axpy(x[rhs],a[rhs],p[rhs][peri_k],x[rhs]);
|
|
rn[rhs] = axpy_norm(r[rhs],-a[rhs],mmp[rhs][peri_k],r[rhs]);
|
|
LinalgTimer.Stop();
|
|
}
|
|
|
|
// Compute z = M x (for *all* RHS)
|
|
M1Timer.Start();
|
|
PcgM1(r,z);
|
|
M1Timer.Stop();
|
|
|
|
RealD max_rn=0.0;
|
|
LinalgTimer.Start();
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
|
|
rtzp[rhs] =real(innerProduct(r[rhs],z[rhs]));
|
|
|
|
// std::cout << GridLogMessage<<"HDCG::fPcg rhs"<<rhs<<" iteration "<<k<<" : inner rtzp "<<rtzp[rhs]<<"\n";
|
|
mu[rhs]=z[rhs];
|
|
|
|
p[rhs][peri_kp]=mu[rhs];
|
|
|
|
// Standard search direction p == z + b p
|
|
b[rhs] = (rtzp[rhs])/rtz[rhs];
|
|
|
|
int northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
|
for(int back=0; back < northog; back++){
|
|
int peri_back = (k-back)%mmax;
|
|
RealD pbApk= real(innerProduct(mmp[rhs][peri_back],p[rhs][peri_kp]));
|
|
RealD beta = -pbApk/pAp[rhs][peri_back];
|
|
axpy(p[rhs][peri_kp],beta,p[rhs][peri_back],p[rhs][peri_kp]);
|
|
}
|
|
|
|
RealD rrn=sqrt(rn[rhs]/ssq[rhs]);
|
|
RealD rtn=sqrt(rtz[rhs]/ssq[rhs]);
|
|
RealD rtnp=sqrt(rtzp[rhs]/ssq[rhs]);
|
|
|
|
std::cout<<GridLogMessage<<"HDCG:fPcg rhs "<<rhs<<" k= "<<k<<" residual = "<<rrn<<"\n";
|
|
if ( rrn > max_rn ) max_rn = rrn;
|
|
}
|
|
LinalgTimer.Stop();
|
|
|
|
// Stopping condition based on worst case
|
|
if ( max_rn <= Tolerance ) {
|
|
|
|
HDCGTimer.Stop();
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg converged in "<<k<<" iterations and "<<HDCGTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Linalg "<<LinalgTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : fine M3 "<<M3Timer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : prec M1 "<<M1Timer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"**** M1 breakdown:"<<std::endl;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Project "<<ProjectTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Promote "<<PromoteTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Deflate "<<DeflateTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Coarse "<<CoarseTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Fine "<<FineTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Smooth "<<SmoothTimer.Elapsed()<<std::endl;;
|
|
std::cout<<GridLogMessage<<"HDCG: mrhs fPcg : Insert "<<InsertTimer.Elapsed()<<std::endl;;
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
_FineLinop.HermOp(x[rhs],mmp[rhs][0]);
|
|
Field tmp(grid);
|
|
axpy(tmp,-1.0,src[rhs],mmp[rhs][0]);
|
|
|
|
RealD mmpnorm = sqrt(norm2(mmp[rhs][0]));
|
|
RealD xnorm = sqrt(norm2(x[rhs]));
|
|
RealD srcnorm = sqrt(norm2(src[rhs]));
|
|
RealD tmpnorm = sqrt(norm2(tmp));
|
|
RealD true_residual = tmpnorm/srcnorm;
|
|
std::cout<<GridLogMessage
|
|
<<"HDCG: true residual ["<<rhs<<"] is "<<true_residual
|
|
<<" solution "<<xnorm
|
|
<<" source "<<srcnorm
|
|
<<" mmp "<<mmpnorm
|
|
<<std::endl;
|
|
}
|
|
return;
|
|
}
|
|
|
|
}
|
|
HDCGTimer.Stop();
|
|
std::cout<<GridLogMessage<<"HDCG: not converged "<<HDCGTimer.Elapsed()<<std::endl;
|
|
for(int rhs=0;rhs<nrhs;rhs++){
|
|
RealD xnorm = sqrt(norm2(x[rhs]));
|
|
RealD srcnorm = sqrt(norm2(src[rhs]));
|
|
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
|
|
}
|
|
}
|
|
|
|
|
|
public:
|
|
|
|
virtual void PcgM1(std::vector<Field> & in,std::vector<Field> & out) = 0;
|
|
virtual void Vstart(std::vector<Field> & x,std::vector<Field> & src) = 0;
|
|
virtual void PcgM2(const Field & in, Field & out) {
|
|
out=in;
|
|
}
|
|
|
|
virtual RealD PcgM3(const Field & p, Field & mmp){
|
|
RealD dd;
|
|
_FineLinop.HermOp(p,mmp);
|
|
ComplexD dot = innerProduct(p,mmp);
|
|
dd=real(dot);
|
|
return dd;
|
|
}
|
|
|
|
};
|
|
|
|
template<class Field, class CoarseField>
|
|
class TwoLevelADEF2mrhs : public TwoLevelCGmrhs<Field>
|
|
{
|
|
public:
|
|
GridBase *coarsegrid;
|
|
GridBase *coarsegridmrhs;
|
|
LinearFunction<CoarseField> &_CoarseSolverMrhs;
|
|
LinearFunction<CoarseField> &_CoarseSolverPreciseMrhs;
|
|
MultiRHSBlockProject<Field> &_Projector;
|
|
MultiRHSDeflation<CoarseField> &_Deflator;
|
|
|
|
|
|
TwoLevelADEF2mrhs(RealD tol,
|
|
Integer maxit,
|
|
LinearOperatorBase<Field> &FineLinop,
|
|
LinearFunction<Field> &Smoother,
|
|
LinearFunction<CoarseField> &CoarseSolverMrhs,
|
|
LinearFunction<CoarseField> &CoarseSolverPreciseMrhs,
|
|
MultiRHSBlockProject<Field> &Projector,
|
|
MultiRHSDeflation<CoarseField> &Deflator,
|
|
GridBase *_coarsemrhsgrid) :
|
|
TwoLevelCGmrhs<Field>(tol, maxit,FineLinop,Smoother,Projector.fine_grid),
|
|
_CoarseSolverMrhs(CoarseSolverMrhs),
|
|
_CoarseSolverPreciseMrhs(CoarseSolverPreciseMrhs),
|
|
_Projector(Projector),
|
|
_Deflator(Deflator)
|
|
{
|
|
coarsegrid = Projector.coarse_grid;
|
|
coarsegridmrhs = _coarsemrhsgrid;// Thi could be in projector
|
|
};
|
|
|
|
// Override Vstart
|
|
virtual void Vstart(std::vector<Field> & x,std::vector<Field> & src)
|
|
{
|
|
int nrhs=x.size();
|
|
///////////////////////////////////
|
|
// Choose x_0 such that
|
|
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
|
|
// = [1 - Ass_inv A] Guess + Assinv src
|
|
// = P^T guess + Assinv src
|
|
// = Vstart [Tang notation]
|
|
// This gives:
|
|
// W^T (src - A x_0) = src_s - A guess_s - r_s
|
|
// = src_s - (A guess)_s - src_s + (A guess)_s
|
|
// = 0
|
|
///////////////////////////////////
|
|
std::vector<CoarseField> PleftProj(nrhs,this->coarsegrid);
|
|
std::vector<CoarseField> PleftMss_proj(nrhs,this->coarsegrid);
|
|
CoarseField PleftProjMrhs(this->coarsegridmrhs);
|
|
CoarseField PleftMss_projMrhs(this->coarsegridmrhs);
|
|
|
|
this->_Projector.blockProject(src,PleftProj);
|
|
this->_Deflator.DeflateSources(PleftProj,PleftMss_proj);
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
InsertSliceFast(PleftProj[rhs],PleftProjMrhs,rhs,0);
|
|
InsertSliceFast(PleftMss_proj[rhs],PleftMss_projMrhs,rhs,0); // the guess
|
|
}
|
|
|
|
this->_CoarseSolverPreciseMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} r_s
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
ExtractSliceFast(PleftMss_proj[rhs],PleftMss_projMrhs,rhs,0);
|
|
}
|
|
this->_Projector.blockPromote(x,PleftMss_proj);
|
|
}
|
|
|
|
virtual void PcgM1(std::vector<Field> & in,std::vector<Field> & out){
|
|
|
|
int nrhs=in.size();
|
|
|
|
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
|
std::vector<Field> tmp(nrhs,this->grid);
|
|
std::vector<Field> Min(nrhs,this->grid);
|
|
|
|
std::vector<CoarseField> PleftProj(nrhs,this->coarsegrid);
|
|
std::vector<CoarseField> PleftMss_proj(nrhs,this->coarsegrid);
|
|
|
|
CoarseField PleftProjMrhs(this->coarsegridmrhs);
|
|
CoarseField PleftMss_projMrhs(this->coarsegridmrhs);
|
|
|
|
// this->rrr=in[0];
|
|
|
|
#undef SMOOTHER_BLOCK_SOLVE
|
|
#if SMOOTHER_BLOCK_SOLVE
|
|
this->SmoothTimer.Start();
|
|
this->_Smoother(in,Min);
|
|
this->SmoothTimer.Stop();
|
|
#else
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
this->SmoothTimer.Start();
|
|
this->_Smoother(in[rhs],Min[rhs]);
|
|
this->SmoothTimer.Stop();
|
|
}
|
|
#endif
|
|
// this->sss=Min[0];
|
|
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
|
|
this->FineTimer.Start();
|
|
this->_FineLinop.HermOp(Min[rhs],out[rhs]);
|
|
axpy(tmp[rhs],-1.0,out[rhs],in[rhs]); // resid = in - A Min
|
|
this->FineTimer.Stop();
|
|
|
|
}
|
|
|
|
this->ProjectTimer.Start();
|
|
this->_Projector.blockProject(tmp,PleftProj);
|
|
this->ProjectTimer.Stop();
|
|
this->DeflateTimer.Start();
|
|
this->_Deflator.DeflateSources(PleftProj,PleftMss_proj);
|
|
this->DeflateTimer.Stop();
|
|
this->InsertTimer.Start();
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
InsertSliceFast(PleftProj[rhs],PleftProjMrhs,rhs,0);
|
|
InsertSliceFast(PleftMss_proj[rhs],PleftMss_projMrhs,rhs,0); // the guess
|
|
}
|
|
this->InsertTimer.Stop();
|
|
|
|
this->CoarseTimer.Start();
|
|
this->_CoarseSolverMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} [in - A Min]_s
|
|
this->CoarseTimer.Stop();
|
|
|
|
this->InsertTimer.Start();
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
ExtractSliceFast(PleftMss_proj[rhs],PleftMss_projMrhs,rhs,0);
|
|
}
|
|
this->InsertTimer.Stop();
|
|
this->PromoteTimer.Start();
|
|
this->_Projector.blockPromote(tmp,PleftMss_proj);// tmp= Q[in - A Min]
|
|
this->PromoteTimer.Stop();
|
|
this->FineTimer.Start();
|
|
// this->qqq=tmp[0];
|
|
for(int rhs=0;rhs<nrhs;rhs++) {
|
|
axpy(out[rhs],1.0,Min[rhs],tmp[rhs]); // Min+tmp
|
|
}
|
|
// this->zzz=out[0];
|
|
this->FineTimer.Stop();
|
|
}
|
|
};
|
|
|
|
|
|
NAMESPACE_END(Grid);
|
|
|
|
|