mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
338 lines
11 KiB
C++
338 lines
11 KiB
C++
/*************************************************************************************
|
|
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
|
|
|
|
Copyright (C) 2017
|
|
|
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution
|
|
directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
|
|
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
|
|
|
|
#include <Grid/Eigen/Dense>
|
|
|
|
namespace Grid {
|
|
|
|
GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
|
{
|
|
int NN = BlockSolverGrid->_ndimension;
|
|
int nsimd = BlockSolverGrid->Nsimd();
|
|
|
|
std::vector<int> latt_phys(0);
|
|
std::vector<int> simd_phys(0);
|
|
std::vector<int> mpi_phys(0);
|
|
|
|
for(int d=0;d<NN;d++){
|
|
if( d!=Orthog ) {
|
|
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
|
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
|
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
|
}
|
|
}
|
|
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
|
}
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Need to move sliceInnerProduct, sliceAxpy, sliceNorm etc... into lattice sector along with sliceSum
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
template<class vobj>
|
|
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
|
{
|
|
typedef typename vobj::scalar_object sobj;
|
|
typedef typename vobj::scalar_type scalar_type;
|
|
typedef typename vobj::vector_type vector_type;
|
|
|
|
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
|
|
|
GridBase *FullGrid = X._grid;
|
|
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
|
|
|
Lattice<vobj> Xslice(SliceGrid);
|
|
Lattice<vobj> Rslice(SliceGrid);
|
|
// If we based this on Cshift it would work for spread out
|
|
// but it would be even slower
|
|
for(int i=0;i<Nblock;i++){
|
|
ExtractSlice(Rslice,Y,i,Orthog);
|
|
for(int j=0;j<Nblock;j++){
|
|
ExtractSlice(Xslice,X,j,Orthog);
|
|
Rslice = Rslice + Xslice*(scale*aa(j,i));
|
|
}
|
|
InsertSlice(Rslice,R,i,Orthog);
|
|
}
|
|
};
|
|
template<class vobj>
|
|
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
|
{
|
|
typedef typename vobj::scalar_object sobj;
|
|
typedef typename vobj::scalar_type scalar_type;
|
|
typedef typename vobj::vector_type vector_type;
|
|
|
|
GridBase *FullGrid = lhs._grid;
|
|
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
|
|
|
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
|
|
|
Lattice<vobj> Lslice(SliceGrid);
|
|
Lattice<vobj> Rslice(SliceGrid);
|
|
|
|
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
|
|
for(int i=0;i<Nblock;i++){
|
|
ExtractSlice(Lslice,lhs,i,Orthog);
|
|
for(int j=0;j<Nblock;j++){
|
|
ExtractSlice(Rslice,rhs,j,Orthog);
|
|
mat(i,j) = innerProduct(Lslice,Rslice);
|
|
}
|
|
}
|
|
#undef FORCE_DIAG
|
|
#ifdef FORCE_DIAG
|
|
for(int i=0;i<Nblock;i++){
|
|
for(int j=0;j<Nblock;j++){
|
|
if ( i != j ) mat(i,j)=0.0;
|
|
}
|
|
}
|
|
#endif
|
|
return;
|
|
}
|
|
template<class vobj>
|
|
static void sliceInnerProductVector( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
|
{
|
|
typedef typename vobj::scalar_object sobj;
|
|
typedef typename vobj::scalar_type scalar_type;
|
|
typedef typename vobj::vector_type vector_type;
|
|
typedef typename vobj::tensor_reduced scalar;
|
|
typedef typename scalar::scalar_object scomplex;
|
|
|
|
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
|
|
|
|
vec.resize(Nblock);
|
|
std::vector<scomplex> sip(Nblock);
|
|
Lattice<scalar> IP(lhs._grid);
|
|
|
|
IP=localInnerProduct(lhs,rhs);
|
|
sliceSum(IP,sip,Orthog);
|
|
|
|
for(int ss=0;ss<Nblock;ss++){
|
|
vec[ss] = TensorRemove(sip[ss]);
|
|
}
|
|
}
|
|
template<class vobj>
|
|
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog) {
|
|
|
|
typedef typename vobj::scalar_object sobj;
|
|
typedef typename vobj::scalar_type scalar_type;
|
|
typedef typename vobj::vector_type vector_type;
|
|
|
|
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
|
|
std::vector<ComplexD> ip(Nblock);
|
|
sn.resize(Nblock);
|
|
|
|
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
|
for(int ss=0;ss<Nblock;ss++){
|
|
sn[ss] = real(ip[ss]);
|
|
}
|
|
};
|
|
/*
|
|
template<class vobj>
|
|
static void sliceInnerProductMatrixOld( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
|
{
|
|
typedef typename vobj::scalar_object sobj;
|
|
typedef typename vobj::scalar_type scalar_type;
|
|
typedef typename vobj::vector_type vector_type;
|
|
typedef typename vobj::tensor_reduced scalar;
|
|
typedef typename scalar::scalar_object scomplex;
|
|
|
|
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
|
|
|
|
std::cout << " sliceInnerProductMatrix Dim "<<Orthog<<" Nblock " << Nblock<<std::endl;
|
|
|
|
Lattice<scalar> IP(lhs._grid);
|
|
std::vector<scomplex> sip(Nblock);
|
|
|
|
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
|
|
Lattice<vobj> tmp = rhs;
|
|
|
|
for(int s1=0;s1<Nblock;s1++){
|
|
|
|
IP=localInnerProduct(lhs,tmp);
|
|
sliceSum(IP,sip,Orthog);
|
|
|
|
std::cout << "InnerProductMatrix ["<<s1<<"] = ";
|
|
for(int ss=0;ss<Nblock;ss++){
|
|
std::cout << TensorRemove(sip[ss])<<" ";
|
|
}
|
|
std::cout << std::endl;
|
|
|
|
for(int ss=0;ss<Nblock;ss++){
|
|
mat(ss,(s1+ss)%Nblock) = TensorRemove(sip[ss]);
|
|
}
|
|
if ( s1!=(Nblock-1) ) {
|
|
tmp = Cshift(tmp,Orthog,1);
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
// Block conjugate gradient. Dimension zero should be the block direction
|
|
//////////////////////////////////////////////////////////////////////////
|
|
template <class Field>
|
|
class BlockConjugateGradient : public OperatorFunction<Field> {
|
|
public:
|
|
|
|
typedef typename Field::scalar_type scomplex;
|
|
|
|
const int blockDim = 0;
|
|
|
|
int Nblock;
|
|
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
|
// Defaults true.
|
|
RealD Tolerance;
|
|
Integer MaxIterations;
|
|
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
|
|
|
BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
|
: Tolerance(tol),
|
|
MaxIterations(maxit),
|
|
ErrorOnNoConverge(err_on_no_conv){};
|
|
|
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
|
{
|
|
int Orthog = 0; // First dimension is block dim
|
|
Nblock = Src._grid->_fdimensions[Orthog];
|
|
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<std::endl;
|
|
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Nblock "<<Nblock<<std::endl;
|
|
|
|
Psi.checkerboard = Src.checkerboard;
|
|
conformable(Psi, Src);
|
|
|
|
Field P(Src);
|
|
Field AP(Src);
|
|
Field R(Src);
|
|
|
|
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
|
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
|
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
|
|
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
|
|
// Initial residual computation & set up
|
|
std::vector<RealD> residuals(Nblock);
|
|
std::vector<RealD> ssq(Nblock);
|
|
|
|
sliceNorm(ssq,Src,Orthog);
|
|
RealD sssum=0;
|
|
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
|
|
|
sliceNorm(residuals,Src,Orthog);
|
|
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
|
|
|
sliceNorm(residuals,Psi,Orthog);
|
|
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
|
|
|
// Initial search dir is guess
|
|
Linop.HermOp(Psi, AP);
|
|
|
|
|
|
/************************************************************************
|
|
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
|
|
************************************************************************
|
|
* O'Leary : R = B - A X
|
|
* O'Leary : P = M R ; preconditioner M = 1
|
|
* O'Leary : alpha = PAP^{-1} RMR
|
|
* O'Leary : beta = RMR^{-1}_old RMR_new
|
|
* O'Leary : X=X+Palpha
|
|
* O'Leary : R_new=R_old-AP alpha
|
|
* O'Leary : P=MR_new+P beta
|
|
*/
|
|
|
|
R = Src - AP;
|
|
P = R;
|
|
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
|
|
|
int k;
|
|
for (k = 1; k <= MaxIterations; k++) {
|
|
|
|
RealD rrsum=0;
|
|
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
|
|
|
|
std::cout << GridLogIterative << " iteration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
|
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
|
|
|
Linop.HermOp(P, AP);
|
|
|
|
// Alpha
|
|
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
|
|
m_pAp_inv = m_pAp.inverse();
|
|
m_alpha = m_pAp_inv * m_rr ;
|
|
|
|
// Psi, R update
|
|
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
|
|
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
|
|
|
// Beta
|
|
m_rr_inv = m_rr.inverse();
|
|
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
|
m_beta = m_rr_inv *m_rr;
|
|
|
|
// Search update
|
|
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
|
|
P= AP;
|
|
|
|
/*********************
|
|
* convergence monitor
|
|
*********************
|
|
*/
|
|
RealD max_resid=0;
|
|
for(int b=0;b<Nblock;b++){
|
|
RealD rr = real(m_rr(b,b))/ssq[b];
|
|
if ( rr > max_resid ) max_resid = rr;
|
|
}
|
|
|
|
if ( max_resid < Tolerance*Tolerance ) {
|
|
std::cout << GridLogMessage<<" Block solver has converged in "
|
|
<<k<<" iterations; max residual is "<<std::sqrt(max_resid)<<std::endl;
|
|
for(int b=0;b<Nblock;b++){
|
|
std::cout << GridLogMessage<< " block "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
|
}
|
|
|
|
Linop.HermOp(Psi, AP);
|
|
AP = AP-Src;
|
|
std::cout << " Block solver true residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
|
IterationsToComplete = k;
|
|
return;
|
|
}
|
|
|
|
}
|
|
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
|
|
|
|
if (ErrorOnNoConverge) assert(0);
|
|
IterationsToComplete = k;
|
|
}
|
|
};
|
|
}
|
|
#endif
|