/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/iterative/AdefGeneric.h Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once /* * Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv. * Script A = SolverMatrix * Script P = Preconditioner * * Implement ADEF-2 * * Vstart = P^Tx + Qb * M1 = P^TM + Q * M2=M3=1 */ NAMESPACE_BEGIN(Grid); template class TwoLevelCGmrhs { public: RealD Tolerance; Integer MaxIterations; GridBase *grid; // Fine operator, Smoother, CoarseSolver LinearOperatorBase &_FineLinop; LinearFunction &_Smoother; MultiRHSBlockCGLinalg _BlockCGLinalg; GridStopWatch ProjectTimer; GridStopWatch PromoteTimer; GridStopWatch DeflateTimer; GridStopWatch CoarseTimer; GridStopWatch FineTimer; GridStopWatch SmoothTimer; GridStopWatch InsertTimer; /* Field rrr; Field sss; Field qqq; Field zzz; */ // more most opertor functions TwoLevelCGmrhs(RealD tol, Integer maxit, LinearOperatorBase &FineLinop, LinearFunction &Smoother, GridBase *fine) : Tolerance(tol), MaxIterations(maxit), _FineLinop(FineLinop), _Smoother(Smoother) /* rrr(fine), sss(fine), qqq(fine), zzz(fine) */ { grid = fine; }; // Vector case virtual void operator() (std::vector &src, std::vector &x) { // SolveSingleSystem(src,x); SolvePrecBlockCG(src,x); } //////////////////////////////////////////////////////////////////////////////////////////////////// // Thin QR factorisation (google it) //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// //Dimensions // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock // // Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen) // // Q C = R => Q = R C^{-1} // // Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock} // // Set C = L^{dag}, and then Q^dag Q = ident // // Checks: // Cdag C = Rdag R ; passes. // QdagQ = 1 ; passes //////////////////////////////////////////////////////////////////////////////////////////////////// void ThinQRfact (Eigen::MatrixXcd &m_zz, Eigen::MatrixXcd &C, Eigen::MatrixXcd &Cinv, std::vector & Q, std::vector & MQ, const std::vector & Z, const std::vector & MZ) { RealD t0=usecond(); _BlockCGLinalg.InnerProductMatrix(m_zz,MZ,Z); RealD t1=usecond(); m_zz = 0.5*(m_zz+m_zz.adjoint()); Eigen::MatrixXcd L = m_zz.llt().matrixL(); C = L.adjoint(); Cinv = C.inverse(); RealD t3=usecond(); _BlockCGLinalg.MulMatrix( Q,Cinv,Z); _BlockCGLinalg.MulMatrix(MQ,Cinv,MZ); RealD t4=usecond(); std::cout << " ThinQRfact IP :"<< t1-t0<<" us"< &src, std::vector &X) { std::cout << GridLogMessage<<"HDCG: mrhs fPrecBlockcg starting"<Barrier(); int nrhs = src.size(); // std::vector f(nrhs); // std::vector rtzp(nrhs); // std::vector rtz(nrhs); // std::vector a(nrhs); // std::vector d(nrhs); // std::vector b(nrhs); // std::vector rptzp(nrhs); //////////////////////////////////////////// //Initial residual computation & set up //////////////////////////////////////////// std::vector ssq(nrhs); for(int rhs=0;rhs Mtmp(nrhs,grid); std::vector tmp(nrhs,grid); std::vector Z(nrhs,grid); // Rename Z to R std::vector MZ(nrhs,grid); // Rename MZ to Z std::vector Q(nrhs,grid); // std::vector MQ(nrhs,grid); // Rename to P std::vector D(nrhs,grid); std::vector AD(nrhs,grid); /************************************************************************ * Preconditioned Block conjugate gradient rQ * Generalise Sebastien Birk Thesis, after Dubrulle 2001. * Introduce preconditioning following Saad Ch9 ************************************************************************ * Dimensions: * * X,B etc... ==(Nferm x nrhs) * Matrix A==(Nferm x Nferm) * * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site * QC => Thin QR factorisation (google it) * * R = B-AX * Z = Mi R * QC = Z * D = Q * for k: * R = AD * Z = Mi R * M = [D^dag R]^{-1} * X = X + D M C * QS = Q - Z.M * D = Q + D S^dag * C = S C */ Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(nrhs,nrhs); Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(nrhs,nrhs); Eigen::MatrixXcd m_zz = Eigen::MatrixXcd::Zero(nrhs,nrhs); Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(nrhs,nrhs); Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(nrhs,nrhs); Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(nrhs,nrhs); Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(nrhs,nrhs); Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(nrhs,nrhs); Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(nrhs,nrhs); Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(nrhs,nrhs); GridStopWatch HDCGTimer; ////////////////////////// // x0 = Vstart -- possibly modify guess ////////////////////////// Vstart(X,src); ////////////////////////// // R = B-AX ////////////////////////// for(int rhs=0;rhs rn(nrhs); for (int k=0;k<=MaxIterations;k++){ //////////////////// // Z = AD //////////////////// M3Timer.Start(); for(int b=0;b_M)^{-1} inner prod, generalising Saad derivation of Precon CG //////////////////// InnerProdTimer.Start(); _BlockCGLinalg.InnerProductMatrix(m_DZ,D,Z); InnerProdTimer.Stop(); m_M = m_DZ.inverse(); /////////////////////////// // X = X + D MC /////////////////////////// m_tmp = m_M * m_C; LinalgTimer.Start(); _BlockCGLinalg.MaddMatrix(X,m_tmp, D,X); // D are the search directions and X takes the updates LinalgTimer.Stop(); /////////////////////////// // QS = Q - M Z // (MQ) S = MQ - M (M1Z) /////////////////////////// LinalgTimer.Start(); _BlockCGLinalg.MaddMatrix(tmp ,m_M, Z, Q,-1.0); _BlockCGLinalg.MaddMatrix(Mtmp,m_M,MZ,MQ,-1.0); ThinQRfact (m_zz, m_S, m_Sinv, Q, MQ, tmp, Mtmp); LinalgTimer.Stop(); //////////////////////////// // D = MQ + D S^dag //////////////////////////// m_tmp = m_S.adjoint(); LinalgTimer.Start(); _BlockCGLinalg.MaddMatrix(D,m_tmp,D,MQ); LinalgTimer.Stop(); //////////////////////////// // C = S C //////////////////////////// m_C = m_S*m_C; //////////////////////////// // convergence monitor //////////////////////////// m_rr = m_C.adjoint() * m_C; FineTimer.Stop(); RealD max_resid=0; RealD rrsum=0; RealD sssum=0; RealD rr; for(int b=0;b max_resid ) max_resid = rr; } std::cout << GridLogMessage << "\t Prec BlockCGrQ Iteration "< &src, std::vector &x) { std::cout << GridLogMessage<<"HDCG: mrhs fPcg starting"<Barrier(); int nrhs = src.size(); std::vector f(nrhs); std::vector rtzp(nrhs); std::vector rtz(nrhs); std::vector a(nrhs); std::vector d(nrhs); std::vector b(nrhs); std::vector rptzp(nrhs); ///////////////////////////// // Set up history vectors ///////////////////////////// int mmax = 3; std::vector > p(nrhs); for(int r=0;r > mmp(nrhs); for(int r=0;r > pAp(nrhs); for(int r=0;r z(nrhs,grid); std::vector mp (nrhs,grid); std::vector r (nrhs,grid); std::vector mu (nrhs,grid); //Initial residual computation & set up std::vector src_nrm(nrhs); for(int rhs=0;rhs tn(nrhs); GridStopWatch HDCGTimer; ////////////////////////// // x0 = Vstart -- possibly modify guess ////////////////////////// Vstart(x,src); for(int rhs=0;rhs ssq(nrhs); std::vector rsq(nrhs); std::vector pp(nrhs,grid); for(int rhs=0;rhs rn(nrhs); for (int k=0;k<=MaxIterations;k++){ int peri_k = k % mmax; int peri_kp = (k+1) % mmax; for(int rhs=0;rhsmmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm for(int back=0; back < northog; back++){ int peri_back = (k-back)%mmax; RealD pbApk= real(innerProduct(mmp[rhs][peri_back],p[rhs][peri_kp])); RealD beta = -pbApk/pAp[rhs][peri_back]; axpy(p[rhs][peri_kp],beta,p[rhs][peri_back],p[rhs][peri_kp]); } RealD rrn=sqrt(rn[rhs]/ssq[rhs]); RealD rtn=sqrt(rtz[rhs]/ssq[rhs]); RealD rtnp=sqrt(rtzp[rhs]/ssq[rhs]); std::cout< max_rn ) max_rn = rrn; } LinalgTimer.Stop(); // Stopping condition based on worst case if ( max_rn <= Tolerance ) { HDCGTimer.Stop(); std::cout< & in,std::vector & out) = 0; virtual void Vstart(std::vector & x,std::vector & src) = 0; virtual void PcgM2(const Field & in, Field & out) { out=in; } virtual RealD PcgM3(const Field & p, Field & mmp){ RealD dd; _FineLinop.HermOp(p,mmp); ComplexD dot = innerProduct(p,mmp); dd=real(dot); return dd; } }; template class TwoLevelADEF2mrhs : public TwoLevelCGmrhs { public: GridBase *coarsegrid; GridBase *coarsegridmrhs; LinearFunction &_CoarseSolverMrhs; LinearFunction &_CoarseSolverPreciseMrhs; MultiRHSBlockProject &_Projector; MultiRHSDeflation &_Deflator; TwoLevelADEF2mrhs(RealD tol, Integer maxit, LinearOperatorBase &FineLinop, LinearFunction &Smoother, LinearFunction &CoarseSolverMrhs, LinearFunction &CoarseSolverPreciseMrhs, MultiRHSBlockProject &Projector, MultiRHSDeflation &Deflator, GridBase *_coarsemrhsgrid) : TwoLevelCGmrhs(tol, maxit,FineLinop,Smoother,Projector.fine_grid), _CoarseSolverMrhs(CoarseSolverMrhs), _CoarseSolverPreciseMrhs(CoarseSolverPreciseMrhs), _Projector(Projector), _Deflator(Deflator) { coarsegrid = Projector.coarse_grid; coarsegridmrhs = _coarsemrhsgrid;// Thi could be in projector }; // Override Vstart virtual void Vstart(std::vector & x,std::vector & src) { int nrhs=x.size(); /////////////////////////////////// // Choose x_0 such that // x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] // = [1 - Ass_inv A] Guess + Assinv src // = P^T guess + Assinv src // = Vstart [Tang notation] // This gives: // W^T (src - A x_0) = src_s - A guess_s - r_s // = src_s - (A guess)_s - src_s + (A guess)_s // = 0 /////////////////////////////////// std::vector PleftProj(nrhs,this->coarsegrid); std::vector PleftMss_proj(nrhs,this->coarsegrid); CoarseField PleftProjMrhs(this->coarsegridmrhs); CoarseField PleftMss_projMrhs(this->coarsegridmrhs); this->_Projector.blockProject(src,PleftProj); this->_Deflator.DeflateSources(PleftProj,PleftMss_proj); for(int rhs=0;rhs_CoarseSolverPreciseMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} r_s for(int rhs=0;rhs_Projector.blockPromote(x,PleftMss_proj); } virtual void PcgM1(std::vector & in,std::vector & out){ int nrhs=in.size(); // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] std::vector tmp(nrhs,this->grid); std::vector Min(nrhs,this->grid); std::vector PleftProj(nrhs,this->coarsegrid); std::vector PleftMss_proj(nrhs,this->coarsegrid); CoarseField PleftProjMrhs(this->coarsegridmrhs); CoarseField PleftMss_projMrhs(this->coarsegridmrhs); // this->rrr=in[0]; #undef SMOOTHER_BLOCK_SOLVE #if SMOOTHER_BLOCK_SOLVE this->SmoothTimer.Start(); this->_Smoother(in,Min); this->SmoothTimer.Stop(); #else for(int rhs=0;rhsSmoothTimer.Start(); this->_Smoother(in[rhs],Min[rhs]); this->SmoothTimer.Stop(); } #endif // this->sss=Min[0]; for(int rhs=0;rhsFineTimer.Start(); this->_FineLinop.HermOp(Min[rhs],out[rhs]); axpy(tmp[rhs],-1.0,out[rhs],in[rhs]); // resid = in - A Min this->FineTimer.Stop(); } this->ProjectTimer.Start(); this->_Projector.blockProject(tmp,PleftProj); this->ProjectTimer.Stop(); this->DeflateTimer.Start(); this->_Deflator.DeflateSources(PleftProj,PleftMss_proj); this->DeflateTimer.Stop(); this->InsertTimer.Start(); for(int rhs=0;rhsInsertTimer.Stop(); this->CoarseTimer.Start(); this->_CoarseSolverMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} [in - A Min]_s this->CoarseTimer.Stop(); this->InsertTimer.Start(); for(int rhs=0;rhsInsertTimer.Stop(); this->PromoteTimer.Start(); this->_Projector.blockPromote(tmp,PleftMss_proj);// tmp= Q[in - A Min] this->PromoteTimer.Stop(); this->FineTimer.Start(); // this->qqq=tmp[0]; for(int rhs=0;rhszzz=out[0]; this->FineTimer.Stop(); } }; NAMESPACE_END(Grid);