/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h Copyright (C) 2017 Author: Azusa Yamaguchi Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #pragma once NAMESPACE_BEGIN(Grid); template void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector &X, const std::vector &Y){ typedef typename Field::scalar_type scomplex; int Nblock = X.size(); for(int b=0;b void MaddMatrix(std::vector &AP, Eigen::MatrixXcd &m , const std::vector &X,const std::vector &Y,RealD scale=1.0){ // Should make this cache friendly with site outermost, parallel_for // Deal with case AP aliases with either Y or X // //Could pack "X" and "AP" into a Nblock x Volume dense array. // AP(Nrhs x vol) = Y(Nrhs x vol) + scale * m(nrhs x nrhs) * X(nrhs*vol) typedef typename Field::scalar_type scomplex; int Nblock = AP.size(); std::vector tmp(Nblock,X[0]); for(int b=0;b void MulMatrix(std::vector &AP, Eigen::MatrixXcd &m , const std::vector &X){ // Should make this cache friendly with site outermost, parallel_for typedef typename Field::scalar_type scomplex; int Nblock = AP.size(); for(int b=0;b double normv(const std::vector &P){ int Nblock = P.size(); double nn = 0.0; for(int b=0;b class BlockConjugateGradient : public OperatorFunction { public: typedef typename Field::scalar_type scomplex; int blockDim ; int Nblock; BlockCGtype CGtype; bool ErrorOnNoConverge; // throw an GRID_ASSERT when the CG fails to converge. // Defaults true. RealD Tolerance; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion Integer PrintInterval; //GridLogMessages or Iterative RealD TrueResidual; BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100) {}; //////////////////////////////////////////////////////////////////////////////////////////////////// // Thin QR factorisation (google it) //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// //Dimensions // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock // // Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen) // // Q C = R => Q = R C^{-1} // // Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock} // // Set C = L^{dag}, and then Q^dag Q = ident // // Checks: // Cdag C = Rdag R ; passes. // QdagQ = 1 ; passes //////////////////////////////////////////////////////////////////////////////////////////////////// void ThinQRfact (Eigen::MatrixXcd &m_rr, Eigen::MatrixXcd &C, Eigen::MatrixXcd &Cinv, Field & Q, const Field & R) { int Orthog = blockDim; // First dimension is block dim; this is an assumption sliceInnerProductMatrix(m_rr,R,R,Orthog); // Force manifest hermitian to avoid rounding related /* int rank=m_rr.rows(); for(int r=0;r & Q, const std::vector & R) { InnerProductMatrix(m_rr,R,R); /* int rank=m_rr.rows(); for(int r=0;r &Linop, const Field &Src, Field &Psi) { if ( CGtype == BlockCGrQ ) { BlockCGrQsolve(Linop,Src,Psi); } else if (CGtype == CGmultiRHS ) { CGmultiRHSsolve(Linop,Src,Psi); } else { GRID_ASSERT(0); } } virtual void operator()(LinearOperatorBase &Linop, const std::vector &Src, std::vector &Psi) { if ( CGtype == BlockCGrQVec ) { BlockCGrQsolveVec(Linop,Src,Psi); } else { GRID_ASSERT(0); } } //////////////////////////////////////////////////////////////////////////// // BlockCGrQ implementation: //-------------------------- // X is guess/Solution // B is RHS // Solve A X_i = B_i ; i refers to Nblock index //////////////////////////////////////////////////////////////////////////// void BlockCGrQsolve(LinearOperatorBase &Linop, const Field &B, Field &X) { int Orthog = blockDim; // First dimension is block dim; this is an assumption Nblock = B.Grid()->_fdimensions[Orthog]; /* FAKE */ Nblock=8; std::cout< residuals(Nblock); std::vector ssq(Nblock); sliceNorm(ssq,B,Orthog); RealD sssum=0; for(int b=0;b Thin QR factorisation (google it) * for k: * Z = AD * M = [D^dag Z]^{-1} * X = X + D MC * QS = Q - ZM * D = Q + D S^dag * C = S C */ /////////////////////////////////////// // Initial block: initial search dir is guess /////////////////////////////////////// std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " < Thin QR factorisation (google it) Linop.HermOp(X, AD); tmp = B - AD; sliceNorm(residuals,tmp,Orthog); for(int b=0;b max_resid ) max_resid = rr; } std::cout << GridLogIterative << "\titeration "< &Linop, const Field &Src, Field &Psi) { int Orthog = blockDim; // First dimension is block dim Nblock = Src.Grid()->_fdimensions[Orthog]; std::cout< v_pAp(Nblock); std::vector v_rr (Nblock); std::vector v_rr_inv(Nblock); std::vector v_alpha(Nblock); std::vector v_beta(Nblock); // Initial residual computation & set up std::vector residuals(Nblock); std::vector ssq(Nblock); sliceNorm(ssq,Src,Orthog); RealD sssum=0; for(int b=0;b max_resid ) max_resid = rr; } if ( max_resid < Tolerance*Tolerance ) { SolverTimer.Stop(); std::cout << GridLogMessage<<"MultiRHS solver converged in " < &Linop, const std::vector &B, std::vector &X) { Nblock = B.size(); GRID_ASSERT(Nblock == X.size()); std::cout< tmp(Nblock,Fake); std::vector Q(Nblock,Fake); std::vector D(Nblock,Fake); std::vector Z(Nblock,Fake); std::vector AD(Nblock,Fake); Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock); Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock); Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock); Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock); Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock); Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock); Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock); Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock); Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock); // Initial residual computation & set up std::vector residuals(Nblock); std::vector ssq(Nblock); RealD sssum=0; for(int b=0;b Thin QR factorisation (google it) * for k: * Z = AD * M = [D^dag Z]^{-1} * X = X + D MC * QS = Q - ZM * D = Q + D S^dag * C = S C */ /////////////////////////////////////// // Initial block: initial search dir is guess /////////////////////////////////////// std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " < Thin QR factorisation (google it) for(int b=0;b max_resid ) max_resid = rr; } std::cout << GridLogIterative << "\t Block Iteration "<