From c745484e65f3479a717539a25d1ede24c9ef2e6f Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 1 Apr 2024 14:14:30 -0400 Subject: [PATCH] 9.5x speed up version --- Grid/algorithms/iterative/AdefMrhs.h | 414 +++++++++++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100644 Grid/algorithms/iterative/AdefMrhs.h diff --git a/Grid/algorithms/iterative/AdefMrhs.h b/Grid/algorithms/iterative/AdefMrhs.h new file mode 100644 index 00000000..e498a291 --- /dev/null +++ b/Grid/algorithms/iterative/AdefMrhs.h @@ -0,0 +1,414 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/algorithms/iterative/AdefGeneric.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#pragma once + + + /* + * Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv. + * Script A = SolverMatrix + * Script P = Preconditioner + * + * Implement ADEF-2 + * + * Vstart = P^Tx + Qb + * M1 = P^TM + Q + * M2=M3=1 + */ +NAMESPACE_BEGIN(Grid); + + +template +class TwoLevelCGmrhs +{ + public: + RealD Tolerance; + Integer MaxIterations; + GridBase *grid; + + // Fine operator, Smoother, CoarseSolver + LinearOperatorBase &_FineLinop; + LinearFunction &_Smoother; + + GridStopWatch ProjectTimer; + GridStopWatch PromoteTimer; + GridStopWatch DeflateTimer; + GridStopWatch CoarseTimer; + GridStopWatch FineTimer; + GridStopWatch SmoothTimer; + GridStopWatch InsertTimer; + + + // more most opertor functions + TwoLevelCGmrhs(RealD tol, + Integer maxit, + LinearOperatorBase &FineLinop, + LinearFunction &Smoother, + GridBase *fine) : + Tolerance(tol), + MaxIterations(maxit), + _FineLinop(FineLinop), + _Smoother(Smoother) + { + grid = fine; + }; + + // Vector case + virtual void operator() (std::vector &src, std::vector &x) + { + std::cout << GridLogMessage<<"HDCG: mrhs fPcg starting"<Barrier(); + int nrhs = src.size(); + std::vector f(nrhs); + std::vector rtzp(nrhs); + std::vector rtz(nrhs); + std::vector a(nrhs); + std::vector d(nrhs); + std::vector b(nrhs); + std::vector rptzp(nrhs); + ///////////////////////////// + // Set up history vectors + ///////////////////////////// + int mmax = 3; + + std::vector > p(nrhs); for(int r=0;r > mmp(nrhs); for(int r=0;r > pAp(nrhs); for(int r=0;r z(nrhs,grid); + std::vector mp (nrhs,grid); + std::vector r (nrhs,grid); + std::vector mu (nrhs,grid); + + //Initial residual computation & set up + std::vector src_nrm(nrhs); + for(int rhs=0;rhs tn(nrhs); + + GridStopWatch HDCGTimer; + ////////////////////////// + // x0 = Vstart -- possibly modify guess + ////////////////////////// + Vstart(x,src); + + for(int rhs=0;rhs ssq(nrhs); + std::vector rsq(nrhs); + std::vector pp(nrhs,grid); + + for(int rhs=0;rhs rn(nrhs); + for (int k=0;k<=MaxIterations;k++){ + + int peri_k = k % mmax; + int peri_kp = (k+1) % mmax; + + for(int rhs=0;rhsmmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm + for(int back=0; back < northog; back++){ + int peri_back = (k-back)%mmax; + RealD pbApk= real(innerProduct(mmp[rhs][peri_back],p[rhs][peri_kp])); + RealD beta = -pbApk/pAp[rhs][peri_back]; + axpy(p[rhs][peri_kp],beta,p[rhs][peri_back],p[rhs][peri_kp]); + } + + RealD rrn=sqrt(rn[rhs]/ssq[rhs]); + RealD rtn=sqrt(rtz[rhs]/ssq[rhs]); + RealD rtnp=sqrt(rtzp[rhs]/ssq[rhs]); + + std::cout< max_rn ) max_rn = rrn; + } + LinalgTimer.Stop(); + + // Stopping condition based on worst case + if ( max_rn <= Tolerance ) { + + HDCGTimer.Stop(); + std::cout< & in,std::vector & out) = 0; + virtual void Vstart(std::vector & x,std::vector & src) = 0; + virtual void PcgM2(const Field & in, Field & out) { + out=in; + } + + virtual RealD PcgM3(const Field & p, Field & mmp){ + RealD dd; + _FineLinop.HermOp(p,mmp); + ComplexD dot = innerProduct(p,mmp); + dd=real(dot); + return dd; + } + +}; + +template +class TwoLevelADEF2mrhs : public TwoLevelCGmrhs +{ +public: + GridBase *coarsegrid; + GridBase *coarsegridmrhs; + LinearFunction &_CoarseSolverMrhs; + LinearFunction &_CoarseSolverPreciseMrhs; + MultiRHSBlockProject &_Projector; + MultiRHSDeflation &_Deflator; + + + TwoLevelADEF2mrhs(RealD tol, + Integer maxit, + LinearOperatorBase &FineLinop, + LinearFunction &Smoother, + LinearFunction &CoarseSolverMrhs, + LinearFunction &CoarseSolverPreciseMrhs, + MultiRHSBlockProject &Projector, + MultiRHSDeflation &Deflator, + GridBase *_coarsemrhsgrid) : + TwoLevelCGmrhs(tol, maxit,FineLinop,Smoother,Projector.fine_grid), + _CoarseSolverMrhs(CoarseSolverMrhs), + _CoarseSolverPreciseMrhs(CoarseSolverPreciseMrhs), + _Projector(Projector), + _Deflator(Deflator) + { + coarsegrid = Projector.coarse_grid; + coarsegridmrhs = _coarsemrhsgrid;// Thi could be in projector + }; + + // Override Vstart + virtual void Vstart(std::vector & x,std::vector & src) + { + int nrhs=x.size(); + /////////////////////////////////// + // Choose x_0 such that + // x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess] + // = [1 - Ass_inv A] Guess + Assinv src + // = P^T guess + Assinv src + // = Vstart [Tang notation] + // This gives: + // W^T (src - A x_0) = src_s - A guess_s - r_s + // = src_s - (A guess)_s - src_s + (A guess)_s + // = 0 + /////////////////////////////////// + std::vector PleftProj(nrhs,this->coarsegrid); + std::vector PleftMss_proj(nrhs,this->coarsegrid); + CoarseField PleftProjMrhs(this->coarsegridmrhs); + CoarseField PleftMss_projMrhs(this->coarsegridmrhs); + + this->_Projector.blockProject(src,PleftProj); + this->_Deflator.DeflateSources(PleftProj,PleftMss_proj); + for(int rhs=0;rhs_CoarseSolverPreciseMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} r_s + + for(int rhs=0;rhs_Projector.blockPromote(x,PleftMss_proj); + } + + virtual void PcgM1(std::vector & in,std::vector & out){ + + int nrhs=in.size(); + + // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] + std::vector tmp(nrhs,this->grid); + std::vector Min(nrhs,this->grid); + + std::vector PleftProj(nrhs,this->coarsegrid); + std::vector PleftMss_proj(nrhs,this->coarsegrid); + + CoarseField PleftProjMrhs(this->coarsegridmrhs); + CoarseField PleftMss_projMrhs(this->coarsegridmrhs); + + for(int rhs=0;rhsSmoothTimer.Start(); + this->_Smoother(in[rhs],Min[rhs]); + this->SmoothTimer.Stop(); + + this->FineTimer.Start(); + this->_FineLinop.HermOp(Min[rhs],out[rhs]); + + axpy(tmp[rhs],-1.0,out[rhs],in[rhs]); // resid = in - A Min + this->FineTimer.Stop(); + + } + + this->ProjectTimer.Start(); + this->_Projector.blockProject(tmp,PleftProj); + this->ProjectTimer.Stop(); + this->DeflateTimer.Start(); + this->_Deflator.DeflateSources(PleftProj,PleftMss_proj); + this->DeflateTimer.Stop(); + this->InsertTimer.Start(); + for(int rhs=0;rhsInsertTimer.Stop(); + + this->CoarseTimer.Start(); + this->_CoarseSolverMrhs(PleftProjMrhs,PleftMss_projMrhs); // Ass^{-1} [in - A Min]_s + this->CoarseTimer.Stop(); + + this->InsertTimer.Start(); + for(int rhs=0;rhsInsertTimer.Stop(); + this->PromoteTimer.Start(); + this->_Projector.blockPromote(tmp,PleftMss_proj);// tmp= Q[in - A Min] + this->PromoteTimer.Stop(); + this->FineTimer.Start(); + for(int rhs=0;rhsFineTimer.Stop(); + } +}; + + +NAMESPACE_END(Grid); + +