mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 15:27:06 +01:00
Merge branch 'develop' into feature/gpu-port
This commit is contained in:
61
Grid/algorithms/Algorithms.h
Normal file
61
Grid/algorithms/Algorithms.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Algorithms.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHMS_H
|
||||
#define GRID_ALGORITHMS_H
|
||||
|
||||
#include <Grid/algorithms/SparseMatrix.h>
|
||||
#include <Grid/algorithms/LinearOperator.h>
|
||||
#include <Grid/algorithms/Preconditioner.h>
|
||||
|
||||
#include <Grid/algorithms/approx/Zolotarev.h>
|
||||
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||
#include <Grid/algorithms/approx/Remez.h>
|
||||
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||
#include <Grid/algorithms/approx/Forecast.h>
|
||||
|
||||
#include <Grid/algorithms/iterative/Deflation.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
#include <Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||
#include <Grid/algorithms/FFT.h>
|
||||
|
||||
// EigCg
|
||||
// Pcg
|
||||
// Hdcg
|
||||
// GCR
|
||||
// etc..
|
||||
|
||||
#endif
|
486
Grid/algorithms/CoarsenedMatrix.h
Normal file
486
Grid/algorithms/CoarsenedMatrix.h
Normal file
@ -0,0 +1,486 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/CoarsenedMatrix.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
class Geometry {
|
||||
// int dimension;
|
||||
public:
|
||||
int npoint;
|
||||
std::vector<int> directions ;
|
||||
std::vector<int> displacements;
|
||||
|
||||
Geometry(int _d) {
|
||||
|
||||
int base = (_d==5) ? 1:0;
|
||||
|
||||
// make coarse grid stencil for 4d , not 5d
|
||||
if ( _d==5 ) _d=4;
|
||||
|
||||
npoint = 2*_d+1;
|
||||
directions.resize(npoint);
|
||||
displacements.resize(npoint);
|
||||
for(int d=0;d<_d;d++){
|
||||
directions[2*d ] = d+base;
|
||||
directions[2*d+1] = d+base;
|
||||
displacements[2*d ] = +1;
|
||||
displacements[2*d+1] = -1;
|
||||
}
|
||||
directions [2*_d]=0;
|
||||
displacements[2*_d]=0;
|
||||
|
||||
//// report back
|
||||
std::cout<<GridLogMessage<<"directions :";
|
||||
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
|
||||
std::cout <<std::endl;
|
||||
std::cout<<GridLogMessage<<"displacements :";
|
||||
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
|
||||
/*
|
||||
// Original cleaner code
|
||||
Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) {
|
||||
for(int d=0;d<dimension;d++){
|
||||
directions[2*d ] = d;
|
||||
directions[2*d+1] = d;
|
||||
displacements[2*d ] = +1;
|
||||
displacements[2*d+1] = -1;
|
||||
}
|
||||
directions [2*dimension]=0;
|
||||
displacements[2*dimension]=0;
|
||||
}
|
||||
std::vector<int> GetDelta(int point) {
|
||||
std::vector<int> delta(dimension,0);
|
||||
delta[directions[point]] = displacements[point];
|
||||
return delta;
|
||||
};
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class Aggregation {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
typedef Lattice<siteVector> CoarseVector;
|
||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
|
||||
GridBase *CoarseGrid;
|
||||
GridBase *FineGrid;
|
||||
std::vector<Lattice<Fobj> > subspace;
|
||||
int checkerboard;
|
||||
int Checkerboard(void){return checkerboard;}
|
||||
Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :
|
||||
CoarseGrid(_CoarseGrid),
|
||||
FineGrid(_FineGrid),
|
||||
subspace(nbasis,_FineGrid),
|
||||
checkerboard(_checkerboard)
|
||||
{
|
||||
};
|
||||
|
||||
void Orthogonalise(void){
|
||||
CoarseScalar InnerProd(CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
|
||||
// CheckOrthogonal();
|
||||
}
|
||||
void CheckOrthogonal(void){
|
||||
CoarseVector iProj(CoarseGrid);
|
||||
CoarseVector eProj(CoarseGrid);
|
||||
for(int i=0;i<nbasis;i++){
|
||||
blockProject(iProj,subspace[i],subspace);
|
||||
eProj=Zero();
|
||||
thread_loop( (int ss=0;ss<CoarseGrid->oSites();ss++),{
|
||||
eProj[ss](i)=CComplex(1.0);
|
||||
});
|
||||
eProj=eProj - iProj;
|
||||
std::cout<<GridLogMessage<<"Orthog check error "<<i<<" " << norm2(eProj)<<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage <<"CheckOrthog done"<<std::endl;
|
||||
}
|
||||
void ProjectToSubspace(CoarseVector &CoarseVec,const FineField &FineVec){
|
||||
blockProject(CoarseVec,FineVec,subspace);
|
||||
}
|
||||
void PromoteFromSubspace(const CoarseVector &CoarseVec,FineField &FineVec){
|
||||
FineVec.Checkerboard() = subspace[0].Checkerboard();
|
||||
blockPromote(CoarseVec,FineVec,subspace);
|
||||
}
|
||||
void CreateSubspaceRandom(GridParallelRNG &RNG){
|
||||
for(int i=0;i<nbasis;i++){
|
||||
random(RNG,subspace[i]);
|
||||
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
|
||||
}
|
||||
Orthogonalise();
|
||||
}
|
||||
|
||||
/*
|
||||
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
|
||||
{
|
||||
// Run a Lanczos with sloppy convergence
|
||||
const int Nstop = nn;
|
||||
const int Nk = nn+20;
|
||||
const int Np = nn+20;
|
||||
const int Nm = Nk+Np;
|
||||
const int MaxIt= 10000;
|
||||
RealD resid = 1.0e-3;
|
||||
|
||||
Chebyshev<FineField> Cheb(0.5,64.0,21);
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
|
||||
// IRL.lock = 1;
|
||||
|
||||
FineField noise(FineGrid); gaussian(RNG,noise);
|
||||
FineField tmp(FineGrid);
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<FineField> evec(Nm,FineGrid);
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval,evec,
|
||||
noise,
|
||||
Nconv);
|
||||
|
||||
// pull back nn vectors
|
||||
for(int b=0;b<nn;b++){
|
||||
|
||||
subspace[b] = evec[b];
|
||||
|
||||
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
||||
|
||||
hermop.Op(subspace[b],tmp);
|
||||
std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
|
||||
|
||||
noise = tmp - sqrt(eval[b])*subspace[b] ;
|
||||
|
||||
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
||||
|
||||
noise = tmp + eval[b]*subspace[b] ;
|
||||
|
||||
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
|
||||
|
||||
}
|
||||
Orthogonalise();
|
||||
for(int b=0;b<nn;b++){
|
||||
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
|
||||
}
|
||||
}
|
||||
*/
|
||||
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
|
||||
|
||||
RealD scale;
|
||||
|
||||
ConjugateGradient<FineField> CG(1.0e-2,10000);
|
||||
FineField noise(FineGrid);
|
||||
FineField Mn(FineGrid);
|
||||
|
||||
for(int b=0;b<nn;b++){
|
||||
|
||||
gaussian(RNG,noise);
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||
|
||||
for(int i=0;i<1;i++){
|
||||
|
||||
CG(hermop,noise,subspace[b]);
|
||||
|
||||
noise = subspace[b];
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
}
|
||||
|
||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(Mn)<<std::endl;
|
||||
subspace[b] = noise;
|
||||
|
||||
}
|
||||
|
||||
Orthogonalise();
|
||||
|
||||
}
|
||||
};
|
||||
// Fine Object == (per site) type of fine field
|
||||
// nbasis == number of deflation vectors
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
|
||||
typedef iVector<CComplex,nbasis > siteVector;
|
||||
typedef Lattice<siteVector> CoarseVector;
|
||||
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
|
||||
|
||||
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj > FineField;
|
||||
|
||||
////////////////////
|
||||
// Data members
|
||||
////////////////////
|
||||
Geometry geom;
|
||||
GridBase * _grid;
|
||||
CartesianStencil<siteVector,siteVector> Stencil;
|
||||
|
||||
std::vector<CoarseMatrix> A;
|
||||
|
||||
|
||||
///////////////////////
|
||||
// Interface
|
||||
///////////////////////
|
||||
GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know
|
||||
|
||||
RealD M (const CoarseVector &in, CoarseVector &out){
|
||||
|
||||
conformable(_grid,in.Grid());
|
||||
conformable(in.Grid(),out.Grid());
|
||||
|
||||
SimpleCompressor<siteVector> compressor;
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
auto in_v = in.View();
|
||||
auto out_v = in.View();
|
||||
thread_loop( (int ss=0;ss<Grid()->oSites();ss++),{
|
||||
siteVector res = Zero();
|
||||
siteVector nbr;
|
||||
int ptype;
|
||||
StencilEntry *SE;
|
||||
for(int point=0;point<geom.npoint;point++){
|
||||
|
||||
SE=Stencil.GetEntry(ptype,point,ss);
|
||||
|
||||
if(SE->_is_local&&SE->_permute) {
|
||||
permute(nbr,in_v[SE->_offset],ptype);
|
||||
} else if(SE->_is_local) {
|
||||
nbr = in_v[SE->_offset];
|
||||
} else {
|
||||
nbr = Stencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
auto A_point = A[point].View();
|
||||
res = res + A_point[ss]*nbr;
|
||||
}
|
||||
vstream(out_v[ss],res);
|
||||
});
|
||||
return norm2(out);
|
||||
};
|
||||
|
||||
RealD Mdag (const CoarseVector &in, CoarseVector &out){
|
||||
return M(in,out);
|
||||
};
|
||||
|
||||
// Defer support for further coarsening for now
|
||||
void Mdiag (const CoarseVector &in, CoarseVector &out){};
|
||||
void Mdir (const CoarseVector &in, CoarseVector &out,int dir, int disp){};
|
||||
|
||||
CoarsenedMatrix(GridCartesian &CoarseGrid) :
|
||||
|
||||
_grid(&CoarseGrid),
|
||||
geom(CoarseGrid._ndimension),
|
||||
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements),
|
||||
A(geom.npoint,&CoarseGrid)
|
||||
{
|
||||
};
|
||||
|
||||
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace){
|
||||
|
||||
FineField iblock(FineGrid); // contributions from within this block
|
||||
FineField oblock(FineGrid); // contributions from outwith this block
|
||||
|
||||
FineField phi(FineGrid);
|
||||
FineField tmp(FineGrid);
|
||||
FineField zz(FineGrid); zz=Zero();
|
||||
FineField Mphi(FineGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(FineGrid);
|
||||
|
||||
CoarseVector iProj(Grid());
|
||||
CoarseVector oProj(Grid());
|
||||
CoarseScalar InnerProd(Grid());
|
||||
|
||||
// Orthogonalise the subblocks over the basis
|
||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||
|
||||
// Compute the matrix elements of linop between this orthonormal
|
||||
// set of vectors.
|
||||
int self_stencil=-1;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
A[p]=Zero();
|
||||
if( geom.displacements[p]==0){
|
||||
self_stencil=p;
|
||||
}
|
||||
}
|
||||
assert(self_stencil!=-1);
|
||||
|
||||
for(int i=0;i<nbasis;i++){
|
||||
phi=Subspace.subspace[i];
|
||||
|
||||
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl;
|
||||
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
|
||||
int dir = geom.directions[p];
|
||||
int disp = geom.displacements[p];
|
||||
|
||||
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
|
||||
|
||||
LatticeCoordinate(coor,dir);
|
||||
|
||||
if ( disp==0 ){
|
||||
linop.OpDiag(phi,Mphi);
|
||||
}
|
||||
else {
|
||||
linop.OpDir(phi,Mphi,dir,disp);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Pick out contributions coming from this cell and neighbour cell
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
if ( disp==0 ) {
|
||||
iblock = Mphi;
|
||||
oblock = Zero();
|
||||
} else if ( disp==1 ) {
|
||||
oblock = where(mod(coor,block)==(block-1),Mphi,zz);
|
||||
iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
|
||||
} else if ( disp==-1 ) {
|
||||
oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
|
||||
iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
Subspace.ProjectToSubspace(iProj,iblock);
|
||||
Subspace.ProjectToSubspace(oProj,oblock);
|
||||
// blockProject(iProj,iblock,Subspace.subspace);
|
||||
// blockProject(oProj,oblock,Subspace.subspace);
|
||||
auto iProj_v = iProj.View() ;
|
||||
auto oProj_v = oProj.View() ;
|
||||
auto A_p = A[p].View();
|
||||
auto A_self = A[self_stencil].View();
|
||||
thread_loop( (int ss=0;ss<Grid()->oSites();ss++),{
|
||||
for(int j=0;j<nbasis;j++){
|
||||
if( disp!= 0 ) {
|
||||
A_p[ss](j,i) = oProj_v[ss](j);
|
||||
}
|
||||
A_self[ss](j,i) = A_self[ss](j,i) + iProj_v[ss](j);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
///////////////////////////
|
||||
// test code worth preserving in if block
|
||||
///////////////////////////
|
||||
std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
|
||||
for(int p=0;p<geom.npoint;p++){
|
||||
std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
|
||||
std::cout<<GridLogMessage<< A[p] << std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
|
||||
|
||||
phi=Subspace.subspace[0];
|
||||
std::vector<int> bc(FineGrid->_ndimension,0);
|
||||
|
||||
blockPick(Grid(),phi,tmp,bc); // Pick out a block
|
||||
linop.Op(tmp,Mphi); // Apply big dop
|
||||
blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
|
||||
std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
|
||||
std::cout<<GridLogMessage<< iProj <<std::endl;
|
||||
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
|
||||
#endif
|
||||
// ForceHermitian();
|
||||
AssertHermitian();
|
||||
// ForceDiagonal();
|
||||
}
|
||||
void ForceDiagonal(void) {
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"**** Forcing coarse operator to be diagonal ****"<<std::endl;
|
||||
std::cout<<GridLogMessage<<"**************************************************"<<std::endl;
|
||||
for(int p=0;p<8;p++){
|
||||
A[p]=Zero();
|
||||
}
|
||||
|
||||
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
|
||||
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
|
||||
|
||||
Complex one(1.0);
|
||||
|
||||
iMatrix<CComplex,nbasis> ident; ident=one;
|
||||
|
||||
val = val*adj(val);
|
||||
val = val + 1.0;
|
||||
|
||||
A[8] = val*ident;
|
||||
|
||||
// for(int s=0;s<Grid()->oSites();s++) {
|
||||
// A[8][s]=val[s];
|
||||
// }
|
||||
}
|
||||
void ForceHermitian(void) {
|
||||
for(int d=0;d<4;d++){
|
||||
int dd=d+1;
|
||||
A[2*d] = adj(Cshift(A[2*d+1],dd,1));
|
||||
}
|
||||
// A[8] = 0.5*(A[8] + adj(A[8]));
|
||||
}
|
||||
void AssertHermitian(void) {
|
||||
CoarseMatrix AA (Grid());
|
||||
CoarseMatrix AAc (Grid());
|
||||
CoarseMatrix Diff (Grid());
|
||||
for(int d=0;d<4;d++){
|
||||
|
||||
int dd=d+1;
|
||||
AAc = Cshift(A[2*d+1],dd,1);
|
||||
AA = A[2*d];
|
||||
|
||||
Diff = AA - adj(AAc);
|
||||
|
||||
std::cout<<GridLogMessage<<"Norm diff dim "<<d<<" "<< norm2(Diff)<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Norm dim "<<d<<" "<< norm2(AA)<<std::endl;
|
||||
|
||||
}
|
||||
Diff = A[8] - adj(A[8]);
|
||||
std::cout<<GridLogMessage<<"Norm diff local "<< norm2(Diff)<<std::endl;
|
||||
std::cout<<GridLogMessage<<"Norm local "<< norm2(A[8])<<std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
291
Grid/algorithms/FFT.h
Normal file
291
Grid/algorithms/FFT.h
Normal file
@ -0,0 +1,291 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Cshift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef _GRID_FFT_H_
|
||||
#define _GRID_FFT_H_
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
#ifdef USE_MKL
|
||||
#include <fftw/fftw3.h>
|
||||
#else
|
||||
#include <fftw3.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class scalar> struct FFTW { };
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
template<> struct FFTW<ComplexD> {
|
||||
public:
|
||||
|
||||
typedef fftw_complex FFTW_scalar;
|
||||
typedef fftw_plan FFTW_plan;
|
||||
|
||||
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||
FFTW_scalar *in, const int *inembed,
|
||||
int istride, int idist,
|
||||
FFTW_scalar *out, const int *onembed,
|
||||
int ostride, int odist,
|
||||
int sign, unsigned flags) {
|
||||
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||
}
|
||||
|
||||
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||
::fftw_flops(p,add,mul,fmas);
|
||||
}
|
||||
|
||||
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||
::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||
::fftw_destroy_plan(p);
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct FFTW<ComplexF> {
|
||||
public:
|
||||
|
||||
typedef fftwf_complex FFTW_scalar;
|
||||
typedef fftwf_plan FFTW_plan;
|
||||
|
||||
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||
FFTW_scalar *in, const int *inembed,
|
||||
int istride, int idist,
|
||||
FFTW_scalar *out, const int *onembed,
|
||||
int ostride, int odist,
|
||||
int sign, unsigned flags) {
|
||||
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||
}
|
||||
|
||||
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||
::fftwf_flops(p,add,mul,fmas);
|
||||
}
|
||||
|
||||
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||
::fftwf_execute_dft(p,in,out);
|
||||
}
|
||||
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||
::fftwf_destroy_plan(p);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef FFTW_FORWARD
|
||||
#define FFTW_FORWARD (-1)
|
||||
#define FFTW_BACKWARD (+1)
|
||||
#endif
|
||||
|
||||
class FFT {
|
||||
private:
|
||||
|
||||
GridCartesian *vgrid;
|
||||
GridCartesian *sgrid;
|
||||
|
||||
int Nd;
|
||||
double flops;
|
||||
double flops_call;
|
||||
uint64_t usec;
|
||||
|
||||
Coordinate dimensions;
|
||||
Coordinate processors;
|
||||
Coordinate processor_coor;
|
||||
|
||||
public:
|
||||
|
||||
static const int forward=FFTW_FORWARD;
|
||||
static const int backward=FFTW_BACKWARD;
|
||||
|
||||
double Flops(void) {return flops;}
|
||||
double MFlops(void) {return flops/usec;}
|
||||
double USec(void) {return (double)usec;}
|
||||
|
||||
FFT ( GridCartesian * grid ) :
|
||||
vgrid(grid),
|
||||
Nd(grid->_ndimension),
|
||||
dimensions(grid->_fdimensions),
|
||||
processors(grid->_processors),
|
||||
processor_coor(grid->_processor_coor)
|
||||
{
|
||||
flops=0;
|
||||
usec =0;
|
||||
Coordinate layout(Nd,1);
|
||||
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||
};
|
||||
|
||||
~FFT ( void) {
|
||||
delete sgrid;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim_mask(Lattice<vobj> &result,const Lattice<vobj> &source,Coordinate mask,int sign){
|
||||
|
||||
conformable(result.Grid(),vgrid);
|
||||
conformable(source.Grid(),vgrid);
|
||||
Lattice<vobj> tmp(vgrid);
|
||||
tmp = source;
|
||||
for(int d=0;d<Nd;d++){
|
||||
if( mask[d] ) {
|
||||
FFT_dim(result,tmp,d,sign);
|
||||
tmp=result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
void FFT_all_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int sign){
|
||||
Coordinate mask(Nd,1);
|
||||
FFT_dim_mask(result,source,mask,sign);
|
||||
}
|
||||
|
||||
|
||||
template<class vobj>
|
||||
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int sign){
|
||||
#ifndef HAVE_FFTW
|
||||
assert(0);
|
||||
#else
|
||||
conformable(result.Grid(),vgrid);
|
||||
conformable(source.Grid(),vgrid);
|
||||
|
||||
int L = vgrid->_ldimensions[dim];
|
||||
int G = vgrid->_fdimensions[dim];
|
||||
|
||||
Coordinate layout(Nd,1);
|
||||
Coordinate pencil_gd(vgrid->_fdimensions);
|
||||
|
||||
pencil_gd[dim] = G*processors[dim];
|
||||
|
||||
// Pencil global vol LxLxGxLxL per node
|
||||
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||
|
||||
// Construct pencils
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename sobj::scalar_type scalar;
|
||||
|
||||
Lattice<sobj> pgbuf(&pencil_g);
|
||||
auto pgbuf_v = pgbuf.View();
|
||||
|
||||
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||
|
||||
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||
int Nlow = 1;
|
||||
for(int d=0;d<dim;d++){
|
||||
Nlow*=vgrid->_ldimensions[d];
|
||||
}
|
||||
|
||||
int rank = 1; /* 1d transforms */
|
||||
int n[] = {G}; /* 1d transforms of length G */
|
||||
int howmany = Ncomp;
|
||||
int odist,idist,istride,ostride;
|
||||
idist = odist = 1; /* Distance between consecutive FT's */
|
||||
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||
int *inembed = n, *onembed = n;
|
||||
|
||||
scalar div;
|
||||
if ( sign == backward ) div = 1.0/G;
|
||||
else if ( sign == forward ) div = 1.0;
|
||||
else assert(0);
|
||||
|
||||
FFTW_plan p;
|
||||
{
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[0];
|
||||
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||
in,inembed,
|
||||
istride,idist,
|
||||
out,onembed,
|
||||
ostride, odist,
|
||||
sign,FFTW_ESTIMATE);
|
||||
}
|
||||
|
||||
// Barrel shift and collect global pencil
|
||||
Coordinate lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
int pc = processor_coor[dim];
|
||||
for(int p=0;p<processors[dim];p++) {
|
||||
thread_loop( (int idx=0;idx<sgrid->lSites();idx++), {
|
||||
Coordinate cbuf(Nd);
|
||||
sobj s;
|
||||
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||
peekLocalSite(s,result,cbuf);
|
||||
cbuf[dim]+=((pc+p) % processors[dim])*L;
|
||||
// cbuf[dim]+=p*L;
|
||||
pokeLocalSite(s,pgbuf,cbuf);
|
||||
});
|
||||
if (p != processors[dim] - 1) {
|
||||
result = Cshift(result,dim,L);
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over orthog coords
|
||||
int NN=pencil_g.lSites();
|
||||
GridStopWatch timer;
|
||||
timer.Start();
|
||||
thread_loop( (int idx=0;idx<NN;idx++), {
|
||||
Coordinate cbuf(Nd);
|
||||
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[idx];
|
||||
FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[idx];
|
||||
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||
}
|
||||
});
|
||||
timer.Stop();
|
||||
|
||||
// performance counting
|
||||
double add,mul,fma;
|
||||
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||
flops_call = add+mul+2.0*fma;
|
||||
usec += timer.useconds();
|
||||
flops+= flops_call*NN;
|
||||
|
||||
// writing out result
|
||||
thread_loop( (int idx=0;idx<sgrid->lSites();idx++), {
|
||||
Coordinate clbuf(Nd), cgbuf(Nd);
|
||||
sobj s;
|
||||
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||
cgbuf = clbuf;
|
||||
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||
peekLocalSite(s,pgbuf,cgbuf);
|
||||
pokeLocalSite(s,result,clbuf);
|
||||
});
|
||||
result = result*div;
|
||||
|
||||
// destroying plan
|
||||
FFTW<scalar>::fftw_destroy_plan(p);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
498
Grid/algorithms/LinearOperator.h
Normal file
498
Grid/algorithms/LinearOperator.h
Normal file
@ -0,0 +1,498 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/LinearOperator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_LINEAR_OP_H
|
||||
#define GRID_ALGORITHM_LINEAR_OP_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// LinearOperators Take a something and return a something.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian Conjugateugate (transpose if real):
|
||||
//SBase
|
||||
// i) F(a x + b y) = aF(x) + b F(y).
|
||||
// ii) <x|Op|y> = <y|AdjOp|x>^\ast
|
||||
//
|
||||
// Would be fun to have a test linearity & Herm Conj function!
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class LinearOperatorBase {
|
||||
public:
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
|
||||
|
||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
|
||||
virtual void HermOp(const Field &in, Field &out)=0;
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code
|
||||
// between RB and non-RB variants. Sparse matrix is like the fermion action def, and then
|
||||
// the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising
|
||||
// replication of code.
|
||||
//
|
||||
// I'm not entirely happy with implementation; to share the Schur code between herm and non-herm
|
||||
// while still having a "OpAndNorm" in the abstract base I had to implement it in both cases
|
||||
// with an assert trap in the non-herm. This isn't right; there must be a better C++ way to
|
||||
// do it, but I fear it required multiple inheritance and mixed in abstract base classes
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Construct herm op from non-herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class MdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Construct herm op and shift it for mgrid smoother
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
RealD _shift;
|
||||
public:
|
||||
ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
assert(0);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
assert(0);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.MdagM(in,out,n1,n2);
|
||||
out = out + _shift*in;
|
||||
|
||||
ComplexD dot;
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Wrap an already herm matrix
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
_Mat.M(in,out);
|
||||
|
||||
ComplexD dot= innerProduct(in,out); n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Even Odd Schur decomp operators; there are several
|
||||
// ways to introduce the even odd checkerboarding
|
||||
//////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class SchurOperatorBase : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
virtual RealD Mpc (const Field &in, Field &out) =0;
|
||||
virtual RealD MpcDag (const Field &in, Field &out) =0;
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no)
|
||||
{
|
||||
Field tmp(in.Grid());
|
||||
tmp.Checkerboard() = in.Checkerboard();
|
||||
ni=Mpc(in,tmp);
|
||||
no=MpcDag(tmp,out);
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MpcDagMpc(in,out,n1,n2);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
RealD n1,n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void Op (const Field &in, Field &out){
|
||||
Mpc(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
MpcDag(in,out);
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0); // must coarsen the unpreconditioned system
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
// std::cout <<"grid pointers: in.Grid()="<< in.Grid() << " out.Grid()=" << out.Grid() << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl;
|
||||
tmp.Checkerboard() = !in.Checkerboard();
|
||||
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
//std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
_Mat.MooeeDag(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagOneOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
_Mat.Meooe(tmp,out);
|
||||
_Mat.MooeeInv(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInvDag(in,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
_Mat.MooeeInvDag(tmp,out);
|
||||
_Mat.MeooeDag(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class SchurDiagTwoOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
public:
|
||||
SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
|
||||
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MooeeInv(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
|
||||
_Mat.MeooeDag(in,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
_Mat.MeooeDag(tmp,out);
|
||||
_Mat.MooeeInvDag(out,tmp);
|
||||
|
||||
return axpy_norm(out,-1.0,tmp,in);
|
||||
}
|
||||
};
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta
|
||||
// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo ) Moo^-1 phi=eta ; psi = Moo^-1 phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field> using SchurDiagOneRH = SchurDiagTwoOperator<Matrix,Field> ;
|
||||
template<class Matrix,class Field> using SchurDiagOneLH = SchurDiagOneOperator<Matrix,Field> ;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Staggered use
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Matrix,class Field>
|
||||
class SchurStaggeredOperator : public SchurOperatorBase<Field> {
|
||||
protected:
|
||||
Matrix &_Mat;
|
||||
Field tmp;
|
||||
RealD mass;
|
||||
double tMpc;
|
||||
double tIP;
|
||||
double tMeo;
|
||||
double taxpby_norm;
|
||||
uint64_t ncall;
|
||||
public:
|
||||
void Report(void)
|
||||
{
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " HermOpAndNorm.IP "<< tIP /ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.MeoMoe "<< tMeo/ncall<<" usec "<<std::endl;
|
||||
std::cout << GridLogMessage << " Mpc.axpby_norm "<< taxpby_norm/ncall<<" usec "<<std::endl;
|
||||
}
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())
|
||||
{
|
||||
assert( _Mat.isTrivialEE() );
|
||||
mass = _Mat.Mass();
|
||||
tMpc=0;
|
||||
tIP =0;
|
||||
tMeo=0;
|
||||
taxpby_norm=0;
|
||||
ncall=0;
|
||||
}
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
n2 = Mpc(in,out);
|
||||
tMpc+=usecond();
|
||||
tIP-=usecond();
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
tIP+=usecond();
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
ncall++;
|
||||
tMpc-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMpc+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
axpby(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out)
|
||||
{
|
||||
|
||||
Field tmp(in.Grid());
|
||||
Field tmp2(in.Grid());
|
||||
|
||||
// std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
// std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
|
||||
tMeo-=usecond();
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
tMeo+=usecond();
|
||||
taxpby_norm-=usecond();
|
||||
RealD nn=axpby_norm(out,-1.0,mass*mass,tmp,in);
|
||||
taxpby_norm+=usecond();
|
||||
return nn;
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
return Mpc(in,out);
|
||||
}
|
||||
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
assert(0);// Never need with staggered
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field> using SchurStagOperator = SchurStaggeredOperator<Matrix,Field>;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for functions of operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class OperatorFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) = 0;
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const std::vector<Field> &in,std::vector<Field> &out) {
|
||||
assert(in.size()==out.size());
|
||||
for(int k=0;k<in.size();k++){
|
||||
(*this)(Linop,in[k],out[k]);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<class Field> class LinearFunction {
|
||||
public:
|
||||
virtual void operator() (const Field &in, Field &out) = 0;
|
||||
};
|
||||
|
||||
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {
|
||||
public:
|
||||
void operator() (const Field &in, Field &out){
|
||||
out = in;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for Multishift solvers for operators
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class OperatorMultiFunction {
|
||||
public:
|
||||
virtual void operator() (LinearOperatorBase<Field> &Linop, const Field &in, std::vector<Field> &out) = 0;
|
||||
};
|
||||
|
||||
// FIXME : To think about
|
||||
|
||||
// Chroma functionality list defining LinearOperator
|
||||
/*
|
||||
virtual void operator() (T& chi, const T& psi, enum PlusMinus isign) const = 0;
|
||||
virtual void operator() (T& chi, const T& psi, enum PlusMinus isign, Real epsilon) const
|
||||
virtual const Subset& subset() const = 0;
|
||||
virtual unsigned long nFlops() const { return 0; }
|
||||
virtual void deriv(P& ds_u, const T& chi, const T& psi, enum PlusMinus isign) const
|
||||
class UnprecLinearOperator : public DiffLinearOperator<T,P,Q>
|
||||
const Subset& subset() const {return all;}
|
||||
};
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Hermitian operator Linear function and operator function
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
class HermOpOperatorFunction : public OperatorFunction<Field> {
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
Linop.HermOp(in,out);
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Field>
|
||||
class PlainHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
|
||||
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
|
||||
{}
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
_Linop.HermOp(in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Field>
|
||||
class FunctionHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
OperatorFunction<Field> & _poly;
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
|
||||
FunctionHermOp(OperatorFunction<Field> & poly,LinearOperatorBase<Field>& linop)
|
||||
: _poly(poly), _Linop(linop) {};
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
_poly(_Linop,in,out);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class Polynomial : public OperatorFunction<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
public:
|
||||
Polynomial(std::vector<RealD> &_Coeffs) : Coeffs(_Coeffs) { };
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
Field AtoN(in.Grid());
|
||||
Field Mtmp(in.Grid());
|
||||
AtoN = in;
|
||||
out = AtoN*Coeffs[0];
|
||||
for(int n=1;n<Coeffs.size();n++){
|
||||
Mtmp = AtoN;
|
||||
Linop.HermOp(Mtmp,AtoN);
|
||||
out=out+AtoN*Coeffs[n];
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
46
Grid/algorithms/Preconditioner.h
Normal file
46
Grid/algorithms/Preconditioner.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/Preconditioner.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PRECONDITIONER_H
|
||||
#define GRID_PRECONDITIONER_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field> class Preconditioner : public LinearFunction<Field> {
|
||||
virtual void operator()(const Field &src, Field & psi)=0;
|
||||
};
|
||||
|
||||
template<class Field> class TrivialPrecon : public Preconditioner<Field> {
|
||||
public:
|
||||
void operator()(const Field &src, Field & psi){
|
||||
psi = src;
|
||||
}
|
||||
TrivialPrecon(void){};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
79
Grid/algorithms/SparseMatrix.h
Normal file
79
Grid/algorithms/SparseMatrix.h
Normal file
@ -0,0 +1,79 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/SparseMatrix.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interface defining what I expect of a general sparse matrix, such as a Fermion action
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SparseMatrixBase {
|
||||
public:
|
||||
virtual GridBase *Grid(void) =0;
|
||||
// Full checkerboar operations
|
||||
virtual RealD M (const Field &in, Field &out)=0;
|
||||
virtual RealD Mdag (const Field &in, Field &out)=0;
|
||||
virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) {
|
||||
Field tmp (in.Grid());
|
||||
ni=M(in,tmp);
|
||||
no=Mdag(tmp,out);
|
||||
}
|
||||
virtual void Mdiag (const Field &in, Field &out)=0;
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Interface augmented by a red black sparse matrix, such as a Fermion action
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrixBase<Field> {
|
||||
public:
|
||||
virtual GridBase *RedBlackGrid(void)=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Query the even even properties to make algorithmic decisions
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
virtual RealD Mass(void) { return 0.0; };
|
||||
virtual int ConstEE(void) { return 0; }; // Disable assumptions unless overridden
|
||||
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const Field &in, Field &out)=0;
|
||||
virtual void Mooee (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInv (const Field &in, Field &out)=0;
|
||||
|
||||
virtual void MeooeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeDag (const Field &in, Field &out)=0;
|
||||
virtual void MooeeInvDag (const Field &in, Field &out)=0;
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
377
Grid/algorithms/approx/Chebyshev.h
Normal file
377
Grid/algorithms/approx/Chebyshev.h
Normal file
@ -0,0 +1,377 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Chebyshev.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CHEBYSHEV_H
|
||||
#define GRID_CHEBYSHEV_H
|
||||
|
||||
#include <Grid/algorithms/LinearOperator.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
struct ChebyParams : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams,
|
||||
RealD, alpha,
|
||||
RealD, beta,
|
||||
int, Npoly);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Chebyshev approximations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
class Chebyshev : public OperatorFunction<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
int order;
|
||||
RealD hi;
|
||||
RealD lo;
|
||||
|
||||
public:
|
||||
void csv(std::ostream &out){
|
||||
RealD diff = hi-lo;
|
||||
RealD delta = diff*1.0e-9;
|
||||
for (RealD x=lo; x<hi; x+=delta) {
|
||||
delta*=1.1;
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Convenience for plotting the approximation
|
||||
void PlotApprox(std::ostream &out) {
|
||||
out<<"Polynomial approx ["<<lo<<","<<hi<<"]"<<std::endl;
|
||||
for(RealD x=lo;x<hi;x+=(hi-lo)/50.0){
|
||||
out <<x<<"\t"<<approx(x)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
Chebyshev(){};
|
||||
Chebyshev(ChebyParams p){ Init(p.alpha,p.beta,p.Npoly);};
|
||||
Chebyshev(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD) ) {Init(_lo,_hi,_order,func);};
|
||||
Chebyshev(RealD _lo,RealD _hi,int _order) {Init(_lo,_hi,_order);};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation".
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// CJ: the one we need for Lanczos
|
||||
void Init(RealD _lo,RealD _hi,int _order)
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
Coeffs.assign(0.,order);
|
||||
Coeffs[order-1] = 1.;
|
||||
};
|
||||
|
||||
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
|
||||
{
|
||||
lo=_lo;
|
||||
hi=_hi;
|
||||
order=_order;
|
||||
|
||||
if(order < 2) exit(-1);
|
||||
Coeffs.resize(order);
|
||||
for(int j=0;j<order;j++){
|
||||
RealD s=0;
|
||||
for(int k=0;k<order;k++){
|
||||
RealD y=std::cos(M_PI*(k+0.5)/order);
|
||||
RealD x=0.5*(y*(hi-lo)+(hi+lo));
|
||||
RealD f=func(x);
|
||||
s=s+f*std::cos( j*M_PI*(k+0.5)/order );
|
||||
}
|
||||
Coeffs[j] = s * 2.0/order;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void JacksonSmooth(void){
|
||||
RealD M=order;
|
||||
RealD alpha = M_PI/(M+2);
|
||||
RealD lmax = std::cos(alpha);
|
||||
RealD sumUsq =0;
|
||||
std::vector<RealD> U(M);
|
||||
std::vector<RealD> a(M);
|
||||
std::vector<RealD> g(M);
|
||||
for(int n=0;n<=M;n++){
|
||||
U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax));
|
||||
sumUsq += U[n]*U[n];
|
||||
}
|
||||
sumUsq = std::sqrt(sumUsq);
|
||||
|
||||
for(int i=1;i<=M;i++){
|
||||
a[i] = U[i]/sumUsq;
|
||||
}
|
||||
g[0] = 1.0;
|
||||
for(int m=1;m<=M;m++){
|
||||
g[m] = 0;
|
||||
for(int i=0;i<=M-m;i++){
|
||||
g[m]+= a[i]*a[m+i];
|
||||
}
|
||||
}
|
||||
for(int m=1;m<=M;m++){
|
||||
Coeffs[m]*=g[m];
|
||||
}
|
||||
}
|
||||
RealD approx(RealD x) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD T0=1;
|
||||
RealD T1=y;
|
||||
|
||||
RealD sum;
|
||||
sum = 0.5*Coeffs[0]*T0;
|
||||
sum+= Coeffs[1]*T1;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int i=2;i<order;i++){
|
||||
Tnp=2*y*Tn-Tnm;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
sum+= Tn*Coeffs[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
|
||||
RealD approxD(RealD x)
|
||||
{
|
||||
RealD Un;
|
||||
RealD Unm;
|
||||
RealD Unp;
|
||||
|
||||
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
|
||||
|
||||
RealD U0=1;
|
||||
RealD U1=2*y;
|
||||
|
||||
RealD sum;
|
||||
sum = Coeffs[1]*U0;
|
||||
sum+= Coeffs[2]*U1*2.0;
|
||||
|
||||
Un =U1;
|
||||
Unm=U0;
|
||||
for(int i=2;i<order-1;i++){
|
||||
Unp=2*y*Un-Unm;
|
||||
Unm=Un;
|
||||
Un =Unp;
|
||||
sum+= Un*Coeffs[i+1]*(i+1.0);
|
||||
}
|
||||
return sum/(0.5*(hi-lo));
|
||||
};
|
||||
|
||||
RealD approxInv(RealD z, RealD x0, int maxiter, RealD resid) {
|
||||
RealD x = x0;
|
||||
RealD eps;
|
||||
|
||||
int i;
|
||||
for (i=0;i<maxiter;i++) {
|
||||
eps = approx(x) - z;
|
||||
if (fabs(eps / z) < resid)
|
||||
return x;
|
||||
x = x - eps / approxD(x);
|
||||
}
|
||||
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in.Grid();
|
||||
|
||||
// std::cout << "Chevyshef(): in.Grid()="<<in.Grid()<<std::endl;
|
||||
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// Tn=T1 = (xscale M + mscale)in
|
||||
RealD xscale = 2.0/(hi-lo);
|
||||
RealD mscale = -(hi+lo)/(hi-lo);
|
||||
Linop.HermOp(T0,y);
|
||||
T1=y*xscale+in*mscale;
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
Linop.HermOp(*Tn,y);
|
||||
|
||||
y=xscale*y+mscale*(*Tn);
|
||||
|
||||
*Tnp=2.0*y-(*Tnm);
|
||||
|
||||
out=out+Coeffs[n]* (*Tnp);
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Field>
|
||||
class ChebyshevLanczos : public Chebyshev<Field> {
|
||||
private:
|
||||
std::vector<RealD> Coeffs;
|
||||
int order;
|
||||
RealD alpha;
|
||||
RealD beta;
|
||||
RealD mu;
|
||||
|
||||
public:
|
||||
ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) :
|
||||
alpha(_alpha),
|
||||
beta(_beta),
|
||||
mu(_mu)
|
||||
{
|
||||
order=_order;
|
||||
Coeffs.resize(order);
|
||||
for(int i=0;i<_order;i++){
|
||||
Coeffs[i] = 0.0;
|
||||
}
|
||||
Coeffs[order-1]=1.0;
|
||||
};
|
||||
|
||||
void csv(std::ostream &out){
|
||||
for (RealD x=-1.2*alpha; x<1.2*alpha; x+=(2.0*alpha)/10000) {
|
||||
RealD f = approx(x);
|
||||
out<< x<<" "<<f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
RealD approx(RealD xx) // Convenience for plotting the approximation
|
||||
{
|
||||
RealD Tn;
|
||||
RealD Tnm;
|
||||
RealD Tnp;
|
||||
Real aa = alpha * alpha;
|
||||
Real bb = beta * beta;
|
||||
|
||||
RealD x = ( 2.0 * (xx-mu)*(xx-mu) - (aa+bb) ) / (aa-bb);
|
||||
|
||||
RealD y= x;
|
||||
|
||||
RealD T0=1;
|
||||
RealD T1=y;
|
||||
|
||||
RealD sum;
|
||||
sum = 0.5*Coeffs[0]*T0;
|
||||
sum+= Coeffs[1]*T1;
|
||||
|
||||
Tn =T1;
|
||||
Tnm=T0;
|
||||
for(int i=2;i<order;i++){
|
||||
Tnp=2*y*Tn-Tnm;
|
||||
Tnm=Tn;
|
||||
Tn =Tnp;
|
||||
sum+= Tn*Coeffs[i];
|
||||
}
|
||||
return sum;
|
||||
};
|
||||
|
||||
// shift_Multiply in Rudy's code
|
||||
void AminusMuSq(LinearOperatorBase<Field> &Linop, const Field &in, Field &out)
|
||||
{
|
||||
GridBase *grid=in.Grid();
|
||||
Field tmp(grid);
|
||||
|
||||
RealD aa= alpha*alpha;
|
||||
RealD bb= beta * beta;
|
||||
|
||||
Linop.HermOp(in,out);
|
||||
out = out - mu*in;
|
||||
|
||||
Linop.HermOp(out,tmp);
|
||||
tmp = tmp - mu * out;
|
||||
|
||||
out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in;
|
||||
};
|
||||
// Implement the required interface
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
|
||||
|
||||
GridBase *grid=in.Grid();
|
||||
|
||||
int vol=grid->gSites();
|
||||
|
||||
Field T0(grid); T0 = in;
|
||||
Field T1(grid);
|
||||
Field T2(grid);
|
||||
Field y(grid);
|
||||
|
||||
Field *Tnm = &T0;
|
||||
Field *Tn = &T1;
|
||||
Field *Tnp = &T2;
|
||||
|
||||
// Tn=T1 = (xscale M )*in
|
||||
AminusMuSq(Linop,T0,T1);
|
||||
|
||||
// sum = .5 c[0] T0 + c[1] T1
|
||||
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
|
||||
for(int n=2;n<order;n++){
|
||||
|
||||
AminusMuSq(Linop,*Tn,y);
|
||||
|
||||
*Tnp=2.0*y-(*Tnm);
|
||||
|
||||
out=out+Coeffs[n]* (*Tnp);
|
||||
|
||||
// Cycle pointers to avoid copies
|
||||
Field *swizzle = Tnm;
|
||||
Tnm =Tn;
|
||||
Tn =Tnp;
|
||||
Tnp =swizzle;
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
152
Grid/algorithms/approx/Forecast.h
Normal file
152
Grid/algorithms/approx/Forecast.h
Normal file
@ -0,0 +1,152 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/Forecast.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: David Murphy <dmurphy@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#ifndef INCLUDED_FORECAST_H
|
||||
#define INCLUDED_FORECAST_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Abstract base class.
|
||||
// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi)
|
||||
// and returns a forecasted solution to the system D*psi = phi (psi).
|
||||
template<class Matrix, class Field>
|
||||
class Forecast
|
||||
{
|
||||
public:
|
||||
virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& chi) = 0;
|
||||
};
|
||||
|
||||
// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012),
|
||||
// used to forecast solutions across poles of the EOFA heatbath.
|
||||
//
|
||||
// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C)
|
||||
template<class Matrix, class Field>
|
||||
class ChronoForecast : public Forecast<Matrix,Field>
|
||||
{
|
||||
public:
|
||||
Field operator()(Matrix &Mat, const Field& phi, const std::vector<Field>& prev_solns)
|
||||
{
|
||||
int degree = prev_solns.size();
|
||||
Field chi(phi); // forecasted solution
|
||||
|
||||
// Trivial cases
|
||||
if(degree == 0){ chi = Zero(); return chi; }
|
||||
else if(degree == 1){ return prev_solns[0]; }
|
||||
|
||||
// RealD dot;
|
||||
ComplexD xp;
|
||||
Field r(phi); // residual
|
||||
Field Mv(phi);
|
||||
std::vector<Field> v(prev_solns); // orthonormalized previous solutions
|
||||
std::vector<Field> MdagMv(degree,phi);
|
||||
|
||||
// Array to hold the matrix elements
|
||||
std::vector<std::vector<ComplexD>> G(degree, std::vector<ComplexD>(degree));
|
||||
|
||||
// Solution and source vectors
|
||||
std::vector<ComplexD> a(degree);
|
||||
std::vector<ComplexD> b(degree);
|
||||
|
||||
// Orthonormalize the vector basis
|
||||
for(int i=0; i<degree; i++){
|
||||
v[i] *= 1.0/std::sqrt(norm2(v[i]));
|
||||
for(int j=i+1; j<degree; j++){ v[j] -= innerProduct(v[i],v[j]) * v[i]; }
|
||||
}
|
||||
|
||||
// Perform sparse matrix multiplication and construct rhs
|
||||
for(int i=0; i<degree; i++){
|
||||
b[i] = innerProduct(v[i],phi);
|
||||
Mat.M(v[i],Mv);
|
||||
Mat.Mdag(Mv,MdagMv[i]);
|
||||
G[i][i] = innerProduct(v[i],MdagMv[i]);
|
||||
}
|
||||
|
||||
// Construct the matrix
|
||||
for(int j=0; j<degree; j++){
|
||||
for(int k=j+1; k<degree; k++){
|
||||
G[j][k] = innerProduct(v[j],MdagMv[k]);
|
||||
G[k][j] = conjugate(G[j][k]);
|
||||
}}
|
||||
|
||||
// Gauss-Jordan elimination with partial pivoting
|
||||
for(int i=0; i<degree; i++){
|
||||
|
||||
// Perform partial pivoting
|
||||
int k = i;
|
||||
for(int j=i+1; j<degree; j++){ if(abs(G[j][j]) > abs(G[k][k])){ k = j; } }
|
||||
if(k != i){
|
||||
xp = b[k];
|
||||
b[k] = b[i];
|
||||
b[i] = xp;
|
||||
for(int j=0; j<degree; j++){
|
||||
xp = G[k][j];
|
||||
G[k][j] = G[i][j];
|
||||
G[i][j] = xp;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert matrix to upper triangular form
|
||||
for(int j=i+1; j<degree; j++){
|
||||
xp = G[j][i]/G[i][i];
|
||||
b[j] -= xp * b[i];
|
||||
for(int k=0; k<degree; k++){ G[j][k] -= xp*G[i][k]; }
|
||||
}
|
||||
}
|
||||
|
||||
// Use Gaussian elimination to solve equations and calculate initial guess
|
||||
chi = Zero();
|
||||
r = phi;
|
||||
for(int i=degree-1; i>=0; i--){
|
||||
a[i] = 0.0;
|
||||
for(int j=i+1; j<degree; j++){ a[i] += G[i][j] * a[j]; }
|
||||
a[i] = (b[i]-a[i])/G[i][i];
|
||||
chi += a[i]*v[i];
|
||||
r -= a[i]*MdagMv[i];
|
||||
}
|
||||
|
||||
RealD true_r(0.0);
|
||||
ComplexD tmp;
|
||||
for(int i=0; i<degree; i++){
|
||||
tmp = -b[i];
|
||||
for(int j=0; j<degree; j++){ tmp += G[i][j]*a[j]; }
|
||||
tmp = conjugate(tmp)*tmp;
|
||||
true_r += std::sqrt(tmp.real());
|
||||
}
|
||||
|
||||
RealD error = std::sqrt(norm2(r)/norm2(phi));
|
||||
std::cout << GridLogMessage << "ChronoForecast: |res|/|src| = " << error << std::endl;
|
||||
|
||||
return chi;
|
||||
};
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
21
Grid/algorithms/approx/LICENSE
Normal file
21
Grid/algorithms/approx/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
|
||||
Copyright (c) 2011 Michael Clark
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
57
Grid/algorithms/approx/MultiShiftFunction.cc
Normal file
57
Grid/algorithms/approx/MultiShiftFunction.cc
Normal file
@ -0,0 +1,57 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/MultiShiftFunction.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
double MultiShiftFunction::approx(double x)
|
||||
{
|
||||
double a = norm;
|
||||
for(int n=0;n<poles.size();n++){
|
||||
a = a + residues[n]/(x+poles[n]);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
void MultiShiftFunction::gnuplot(std::ostream &out)
|
||||
{
|
||||
out<<"f(x) = "<<norm<<"";
|
||||
for(int n=0;n<poles.size();n++){
|
||||
out<<"+("<<residues[n]<<"/(x+"<<poles[n]<<"))";
|
||||
}
|
||||
out<<";"<<std::endl;
|
||||
}
|
||||
void MultiShiftFunction::csv(std::ostream &out)
|
||||
{
|
||||
for (double x=lo; x<hi; x*=1.05) {
|
||||
double f = approx(x);
|
||||
double r = sqrt(x);
|
||||
out<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
NAMESPACE_END(Grid);
|
67
Grid/algorithms/approx/MultiShiftFunction.h
Normal file
67
Grid/algorithms/approx/MultiShiftFunction.h
Normal file
@ -0,0 +1,67 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/MultiShiftFunction.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef MULTI_SHIFT_FUNCTION
|
||||
#define MULTI_SHIFT_FUNCTION
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
class MultiShiftFunction {
|
||||
public:
|
||||
int order;
|
||||
std::vector<RealD> poles;
|
||||
std::vector<RealD> residues;
|
||||
std::vector<RealD> tolerances;
|
||||
RealD norm;
|
||||
RealD lo,hi;
|
||||
|
||||
MultiShiftFunction(int n,RealD _lo,RealD _hi): poles(n), residues(n), lo(_lo), hi(_hi) {;};
|
||||
RealD approx(RealD x);
|
||||
void csv(std::ostream &out);
|
||||
void gnuplot(std::ostream &out);
|
||||
|
||||
void Init(AlgRemez & remez,double tol,bool inverse)
|
||||
{
|
||||
order=remez.getDegree();
|
||||
tolerances.resize(remez.getDegree(),tol);
|
||||
poles.resize(remez.getDegree());
|
||||
residues.resize(remez.getDegree());
|
||||
remez.getBounds(lo,hi);
|
||||
if ( inverse ) remez.getIPFE (&residues[0],&poles[0],&norm);
|
||||
else remez.getPFE (&residues[0],&poles[0],&norm);
|
||||
}
|
||||
// Allow deferred initialisation
|
||||
MultiShiftFunction(void){};
|
||||
MultiShiftFunction(AlgRemez & remez,double tol,bool inverse)
|
||||
{
|
||||
Init(remez,tol,inverse);
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
80
Grid/algorithms/approx/README
Normal file
80
Grid/algorithms/approx/README
Normal file
@ -0,0 +1,80 @@
|
||||
-----------------------------------------------------------------------------------
|
||||
|
||||
PAB. Took Mike Clark's AlgRemez from GitHub and (modified a little) include.
|
||||
This is open source and license and readme and comments are preserved consistent
|
||||
with the license. Mike, thankyou!
|
||||
-----------------------------------------------------------------------------------
|
||||
-----------------------------------------------------------------------------------
|
||||
AlgRemez
|
||||
|
||||
The archive downloadable here contains an implementation of the Remez
|
||||
algorithm which calculates optimal rational (and polynomial)
|
||||
approximations to the nth root over a given spectral range. The Remez
|
||||
algorithm, although in principle is extremely straightforward to
|
||||
program, is quite difficult to get completely correct, e.g., the Maple
|
||||
implementation of the algorithm does not always converge to the
|
||||
correct answer.
|
||||
|
||||
To use this algorithm you need to install GMP, the GNU Multiple
|
||||
Precision Library, and when configuring the install, you must include
|
||||
the --enable-mpfr option (see the GMP manual for more details). You
|
||||
also have to edit the Makefile for AlgRemez appropriately for your
|
||||
system, namely to point corrrectly to the location of the GMP library.
|
||||
|
||||
The simple main program included with this archive invokes the
|
||||
AlgRemez class to calculate an approximation given by command line
|
||||
arguments. It is invoked by the following
|
||||
|
||||
./test y z n d lambda_low lambda_high precision,
|
||||
|
||||
where the function to be approximated is f(x) = x^(y/z), with degree
|
||||
(n,d) over the spectral range [lambda_low, lambda_high], using
|
||||
precision digits of precision in the arithmetic. So an example would
|
||||
be
|
||||
|
||||
./test 1 2 5 5 0.0004 64 40
|
||||
|
||||
which corresponds to constructing a rational approximation to the
|
||||
square root function, with degree (5,5) over the range [0.0004,64]
|
||||
with 40 digits of precision used for the arithmetic. The parameters y
|
||||
and z must be positive, the approximation to f(x) = x^(-y/z) is simply
|
||||
the inverse of the approximation to f(x) = x^(y/z). After the
|
||||
approximation has been constructed, the roots and poles of the
|
||||
rational function are found, and then the partial fraction expansion
|
||||
of both the rational function and it's inverse are found, the results
|
||||
of which are output to a file called "approx.dat". In addition, the
|
||||
error function of the approximation is output to "error.dat", where it
|
||||
can be checked that the resultant approximation satisfies Chebychev's
|
||||
criterion, namely all error maxima are equal in magnitude, and
|
||||
adjacent maxima are oppostie in sign. There are some caveats here
|
||||
however, the optimal polynomial approximation has complex roots, and
|
||||
the root finding implemented here cannot (yet) handle complex roots.
|
||||
In addition, the partial fraction expansion of rational approximations
|
||||
is only found for the case n = d, i.e., the degree of numerator
|
||||
polynomial equals that of the denominator polynomial. The convention
|
||||
for the partial fraction expansion is that polar shifts are always
|
||||
written added to x, not subtracted.
|
||||
|
||||
To do list
|
||||
|
||||
1. Include an exponential dampening factor in the function to be
|
||||
approximated. This may sound trivial to implement, but for some
|
||||
parameters, the algorithm seems to breakdown. Also, the roots in the
|
||||
rational approximation sometimes become complex, which currently
|
||||
breaks the stupidly simple root finding code.
|
||||
|
||||
2. Make the algorithm faster - it's too slow when running on qcdoc.
|
||||
|
||||
3. Add complex root finding.
|
||||
|
||||
4. Add more options for error minimisation - currently the code
|
||||
minimises the relative error, should add options for absolute error,
|
||||
and other norms.
|
||||
|
||||
There will be a forthcoming publication concerning the results
|
||||
generated by this software, but in the meantime, if you use this
|
||||
software, please cite it as
|
||||
"M.A. Clark and A.D. Kennedy, https://github.com/mikeaclark/AlgRemez, 2005".
|
||||
|
||||
If you have any problems using the software, then please email scientist.mike@gmail.com.
|
||||
|
759
Grid/algorithms/approx/Remez.cc
Normal file
759
Grid/algorithms/approx/Remez.cc
Normal file
@ -0,0 +1,759 @@
|
||||
/*
|
||||
|
||||
Mike Clark - 25th May 2005
|
||||
|
||||
alg_remez.C
|
||||
|
||||
AlgRemez is an implementation of the Remez algorithm, which in this
|
||||
case is used for generating the optimal nth root rational
|
||||
approximation.
|
||||
|
||||
Note this class requires the gnu multiprecision (GNU MP) library.
|
||||
|
||||
*/
|
||||
|
||||
#include<math.h>
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string>
|
||||
#include<iostream>
|
||||
#include<iomanip>
|
||||
#include<cassert>
|
||||
|
||||
#include<Grid/algorithms/approx/Remez.h>
|
||||
|
||||
// Constructor
|
||||
AlgRemez::AlgRemez(double lower, double upper, long precision)
|
||||
{
|
||||
prec = precision;
|
||||
bigfloat::setDefaultPrecision(prec);
|
||||
|
||||
apstrt = lower;
|
||||
apend = upper;
|
||||
apwidt = apend - apstrt;
|
||||
|
||||
std::cout<<"Approximation bounds are ["<<apstrt<<","<<apend<<"]\n";
|
||||
std::cout<<"Precision of arithmetic is "<<precision<<std::endl;
|
||||
|
||||
alloc = 0;
|
||||
n = 0;
|
||||
d = 0;
|
||||
|
||||
foundRoots = 0;
|
||||
|
||||
// Only require the approximation spread to be less than 1 ulp
|
||||
tolerance = 1e-15;
|
||||
}
|
||||
|
||||
// Destructor
|
||||
AlgRemez::~AlgRemez()
|
||||
{
|
||||
if (alloc) {
|
||||
delete [] param;
|
||||
delete [] roots;
|
||||
delete [] poles;
|
||||
delete [] xx;
|
||||
delete [] mm;
|
||||
delete [] a_power;
|
||||
delete [] a;
|
||||
}
|
||||
}
|
||||
|
||||
// Free memory and reallocate as necessary
|
||||
void AlgRemez::allocate(int num_degree, int den_degree)
|
||||
{
|
||||
// Arrays have previously been allocated, deallocate first, then allocate
|
||||
if (alloc) {
|
||||
delete [] param;
|
||||
delete [] roots;
|
||||
delete [] poles;
|
||||
delete [] xx;
|
||||
delete [] mm;
|
||||
}
|
||||
|
||||
// Note use of new and delete in memory allocation - cannot run on qcdsp
|
||||
param = new bigfloat[num_degree+den_degree+1];
|
||||
roots = new bigfloat[num_degree];
|
||||
poles = new bigfloat[den_degree];
|
||||
xx = new bigfloat[num_degree+den_degree+3];
|
||||
mm = new bigfloat[num_degree+den_degree+2];
|
||||
|
||||
if (!alloc) {
|
||||
// The coefficients of the sum in the exponential
|
||||
a = new bigfloat[SUM_MAX];
|
||||
a_power = new int[SUM_MAX];
|
||||
}
|
||||
|
||||
alloc = 1;
|
||||
}
|
||||
|
||||
// Reset the bounds of the approximation
|
||||
void AlgRemez::setBounds(double lower, double upper)
|
||||
{
|
||||
apstrt = lower;
|
||||
apend = upper;
|
||||
apwidt = apend - apstrt;
|
||||
}
|
||||
|
||||
// Generate the rational approximation x^(pnum/pden)
|
||||
double AlgRemez::generateApprox(int degree, unsigned long pnum,
|
||||
unsigned long pden)
|
||||
{
|
||||
return generateApprox(degree, degree, pnum, pden);
|
||||
}
|
||||
|
||||
double AlgRemez::generateApprox(int num_degree, int den_degree,
|
||||
unsigned long pnum, unsigned long pden)
|
||||
{
|
||||
double *a_param = 0;
|
||||
int *a_pow = 0;
|
||||
return generateApprox(num_degree, den_degree, pnum, pden, 0, a_param, a_pow);
|
||||
}
|
||||
|
||||
// Generate the rational approximation x^(pnum/pden)
|
||||
double AlgRemez::generateApprox(int num_degree, int den_degree,
|
||||
unsigned long pnum, unsigned long pden,
|
||||
int a_len, double *a_param, int *a_pow)
|
||||
{
|
||||
std::cout<<"Degree of the approximation is ("<<num_degree<<","<<den_degree<<")\n";
|
||||
std::cout<<"Approximating the function x^("<<pnum<<"/"<<pden<<")\n";
|
||||
|
||||
// Reallocate arrays, since degree has changed
|
||||
if (num_degree != n || den_degree != d) allocate(num_degree,den_degree);
|
||||
|
||||
assert(a_len<=SUM_MAX);
|
||||
|
||||
step = new bigfloat[num_degree+den_degree+2];
|
||||
|
||||
a_length = a_len;
|
||||
for (int j=0; j<a_len; j++) {
|
||||
a[j]= a_param[j];
|
||||
a_power[j] = a_pow[j];
|
||||
}
|
||||
|
||||
power_num = pnum;
|
||||
power_den = pden;
|
||||
spread = 1.0e37;
|
||||
iter = 0;
|
||||
|
||||
n = num_degree;
|
||||
d = den_degree;
|
||||
neq = n + d + 1;
|
||||
|
||||
initialGuess();
|
||||
stpini(step);
|
||||
|
||||
while (spread > tolerance) { //iterate until convergance
|
||||
|
||||
if (iter++%100==0)
|
||||
std::cout<<"Iteration " <<iter-1<<" spread "<<(double)spread<<" delta "<<(double)delta<<std::endl;
|
||||
|
||||
equations();
|
||||
if (delta < tolerance) {
|
||||
std::cout<<"Delta too small, try increasing precision\n";
|
||||
assert(0);
|
||||
};
|
||||
assert( delta>= tolerance);
|
||||
|
||||
search(step);
|
||||
}
|
||||
|
||||
int sign;
|
||||
double error = (double)getErr(mm[0],&sign);
|
||||
std::cout<<"Converged at "<<iter<<" iterations; error = "<<error<<std::endl;
|
||||
|
||||
// Once the approximation has been generated, calculate the roots
|
||||
if(!root()) {
|
||||
std::cout<<"Root finding failed\n";
|
||||
} else {
|
||||
foundRoots = 1;
|
||||
}
|
||||
|
||||
delete [] step;
|
||||
|
||||
// Return the maximum error in the approximation
|
||||
return error;
|
||||
}
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(pnum/pden)
|
||||
int AlgRemez::getPFE(double *Res, double *Pole, double *Norm) {
|
||||
|
||||
if (n!=d) {
|
||||
std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!alloc) {
|
||||
std::cout<<"Approximation not yet generated\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!foundRoots) {
|
||||
std::cout<<"Roots not found, so PFE cannot be taken\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
bigfloat *r = new bigfloat[n];
|
||||
bigfloat *p = new bigfloat[d];
|
||||
|
||||
for (int i=0; i<n; i++) r[i] = roots[i];
|
||||
for (int i=0; i<d; i++) p[i] = poles[i];
|
||||
|
||||
// Perform a partial fraction expansion
|
||||
pfe(r, p, norm);
|
||||
|
||||
// Convert to double and return
|
||||
*Norm = (double)norm;
|
||||
for (int i=0; i<n; i++) Res[i] = (double)r[i];
|
||||
for (int i=0; i<d; i++) Pole[i] = (double)p[i];
|
||||
|
||||
delete [] r;
|
||||
delete [] p;
|
||||
|
||||
// Where the smallest shift is located
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(-pnum/pden)
|
||||
int AlgRemez::getIPFE(double *Res, double *Pole, double *Norm) {
|
||||
|
||||
if (n!=d) {
|
||||
std::cout<<"Cannot handle case: Numerator degree neq Denominator degree\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!alloc) {
|
||||
std::cout<<"Approximation not yet generated\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!foundRoots) {
|
||||
std::cout<<"Roots not found, so PFE cannot be taken\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
bigfloat *r = new bigfloat[d];
|
||||
bigfloat *p = new bigfloat[n];
|
||||
|
||||
// Want the inverse function
|
||||
for (int i=0; i<n; i++) {
|
||||
r[i] = poles[i];
|
||||
p[i] = roots[i];
|
||||
}
|
||||
|
||||
// Perform a partial fraction expansion
|
||||
pfe(r, p, (bigfloat)1l/norm);
|
||||
|
||||
// Convert to double and return
|
||||
*Norm = (double)((bigfloat)1l/(norm));
|
||||
for (int i=0; i<n; i++) {
|
||||
Res[i] = (double)r[i];
|
||||
Pole[i] = (double)p[i];
|
||||
}
|
||||
|
||||
delete [] r;
|
||||
delete [] p;
|
||||
|
||||
// Where the smallest shift is located
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initial values of maximal and minimal errors
|
||||
void AlgRemez::initialGuess() {
|
||||
|
||||
// Supply initial guesses for solution points
|
||||
long ncheb = neq; // Degree of Chebyshev error estimate
|
||||
bigfloat a, r;
|
||||
|
||||
// Find ncheb+1 extrema of Chebyshev polynomial
|
||||
|
||||
a = ncheb;
|
||||
mm[0] = apstrt;
|
||||
for (long i = 1; i < ncheb; i++) {
|
||||
r = 0.5 * (1 - cos((M_PI * i)/(double) a));
|
||||
//r *= sqrt_bf(r);
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
mm[i] = apstrt + r * apwidt;
|
||||
}
|
||||
mm[ncheb] = apend;
|
||||
|
||||
a = 2.0 * ncheb;
|
||||
for (long i = 0; i <= ncheb; i++) {
|
||||
r = 0.5 * (1 - cos(M_PI * (2*i+1)/(double) a));
|
||||
//r *= sqrt_bf(r); // Squeeze to low end of interval
|
||||
r = (exp((double)r)-1.0)/(exp(1.0)-1.0);
|
||||
xx[i] = apstrt + r * apwidt;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise step sizes
|
||||
void AlgRemez::stpini(bigfloat *step) {
|
||||
xx[neq+1] = apend;
|
||||
delta = 0.25;
|
||||
step[0] = xx[0] - apstrt;
|
||||
for (int i = 1; i < neq; i++) step[i] = xx[i] - xx[i-1];
|
||||
step[neq] = step[neq-1];
|
||||
}
|
||||
|
||||
// Search for error maxima and minima
|
||||
void AlgRemez::search(bigfloat *step) {
|
||||
bigfloat a, q, xm, ym, xn, yn, xx0, xx1;
|
||||
int i, meq, emsign, ensign, steps;
|
||||
|
||||
meq = neq + 1;
|
||||
bigfloat *yy = new bigfloat[meq];
|
||||
|
||||
bigfloat eclose = 1.0e30;
|
||||
bigfloat farther = 0l;
|
||||
|
||||
xx0 = apstrt;
|
||||
|
||||
for (i = 0; i < meq; i++) {
|
||||
steps = 0;
|
||||
xx1 = xx[i]; // Next zero
|
||||
if (i == meq-1) xx1 = apend;
|
||||
xm = mm[i];
|
||||
ym = getErr(xm,&emsign);
|
||||
q = step[i];
|
||||
xn = xm + q;
|
||||
if (xn < xx0 || xn >= xx1) { // Cannot skip over adjacent boundaries
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
} else {
|
||||
yn = getErr(xn,&ensign);
|
||||
if (yn < ym) {
|
||||
q = -q;
|
||||
xn = xm;
|
||||
yn = ym;
|
||||
ensign = emsign;
|
||||
}
|
||||
}
|
||||
|
||||
while(yn >= ym) { // March until error becomes smaller.
|
||||
if (++steps > 10) break;
|
||||
ym = yn;
|
||||
xm = xn;
|
||||
emsign = ensign;
|
||||
a = xm + q;
|
||||
if (a == xm || a <= xx0 || a >= xx1) break;// Must not skip over the zeros either side.
|
||||
xn = a;
|
||||
yn = getErr(xn,&ensign);
|
||||
}
|
||||
|
||||
mm[i] = xm; // Position of maximum
|
||||
yy[i] = ym; // Value of maximum
|
||||
|
||||
if (eclose > ym) eclose = ym;
|
||||
if (farther < ym) farther = ym;
|
||||
|
||||
xx0 = xx1; // Walk to next zero.
|
||||
} // end of search loop
|
||||
|
||||
q = (farther - eclose); // Decrease step size if error spread increased
|
||||
if (eclose != 0.0) q /= eclose; // Relative error spread
|
||||
if (q >= spread) delta *= 0.5; // Spread is increasing; decrease step size
|
||||
spread = q;
|
||||
|
||||
for (i = 0; i < neq; i++) {
|
||||
q = yy[i+1];
|
||||
if (q != 0.0) q = yy[i] / q - (bigfloat)1l;
|
||||
else q = 0.0625;
|
||||
if (q > (bigfloat)0.25) q = 0.25;
|
||||
q *= mm[i+1] - mm[i];
|
||||
step[i] = q * delta;
|
||||
}
|
||||
step[neq] = step[neq-1];
|
||||
|
||||
for (i = 0; i < neq; i++) { // Insert new locations for the zeros.
|
||||
xm = xx[i] - step[i];
|
||||
if (xm <= apstrt) continue;
|
||||
if (xm >= apend) continue;
|
||||
if (xm <= mm[i]) xm = (bigfloat)0.5 * (mm[i] + xx[i]);
|
||||
if (xm >= mm[i+1]) xm = (bigfloat)0.5 * (mm[i+1] + xx[i]);
|
||||
xx[i] = xm;
|
||||
}
|
||||
|
||||
delete [] yy;
|
||||
}
|
||||
|
||||
// Solve the equations
|
||||
void AlgRemez::equations(void) {
|
||||
bigfloat x, y, z;
|
||||
int i, j, ip;
|
||||
bigfloat *aa;
|
||||
|
||||
bigfloat *AA = new bigfloat[(neq)*(neq)];
|
||||
bigfloat *BB = new bigfloat[neq];
|
||||
|
||||
for (i = 0; i < neq; i++) { // set up the equations for solution by simq()
|
||||
ip = neq * i; // offset to 1st element of this row of matrix
|
||||
x = xx[i]; // the guess for this row
|
||||
y = func(x); // right-hand-side vector
|
||||
|
||||
z = (bigfloat)1l;
|
||||
aa = AA+ip;
|
||||
for (j = 0; j <= n; j++) {
|
||||
*aa++ = z;
|
||||
z *= x;
|
||||
}
|
||||
|
||||
z = (bigfloat)1l;
|
||||
for (j = 0; j < d; j++) {
|
||||
*aa++ = -y * z;
|
||||
z *= x;
|
||||
}
|
||||
BB[i] = y * z; // Right hand side vector
|
||||
}
|
||||
|
||||
// Solve the simultaneous linear equations.
|
||||
if (simq(AA, BB, param, neq)) {
|
||||
std::cout<<"simq failed\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
delete [] AA;
|
||||
delete [] BB;
|
||||
|
||||
}
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients
|
||||
// from the solution vector param
|
||||
bigfloat AlgRemez::approx(const bigfloat x) {
|
||||
bigfloat yn, yd;
|
||||
int i;
|
||||
|
||||
// Work backwards toward the constant term.
|
||||
yn = param[n]; // Highest order numerator coefficient
|
||||
for (i = n-1; i >= 0; i--) yn = x * yn + param[i];
|
||||
yd = x + param[n+d]; // Highest degree coefficient = 1.0
|
||||
for (i = n+d-1; i > n; i--) yd = x * yd + param[i];
|
||||
|
||||
return(yn/yd);
|
||||
}
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat AlgRemez::getErr(bigfloat x, int *sign) {
|
||||
bigfloat e, f;
|
||||
|
||||
f = func(x);
|
||||
e = approx(x) - f;
|
||||
if (f != 0) e /= f;
|
||||
if (e < (bigfloat)0.0) {
|
||||
*sign = -1;
|
||||
e = -e;
|
||||
}
|
||||
else *sign = 1;
|
||||
|
||||
return(e);
|
||||
}
|
||||
|
||||
// Calculate function required for the approximation.
|
||||
bigfloat AlgRemez::func(const bigfloat x) {
|
||||
|
||||
bigfloat z = (bigfloat)power_num / (bigfloat)power_den;
|
||||
bigfloat y;
|
||||
|
||||
if (x == (bigfloat)1.0) y = (bigfloat)1.0;
|
||||
else y = pow_bf(x,z);
|
||||
|
||||
if (a_length > 0) {
|
||||
bigfloat sum = 0l;
|
||||
for (int j=0; j<a_length; j++) sum += a[j]*pow_bf(x,a_power[j]);
|
||||
return y * exp_bf(sum);
|
||||
} else {
|
||||
return y;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Solve the system AX=B
|
||||
int AlgRemez::simq(bigfloat A[], bigfloat B[], bigfloat X[], int n) {
|
||||
|
||||
int i, j, ij, ip, ipj, ipk, ipn;
|
||||
int idxpiv, iback;
|
||||
int k, kp, kp1, kpk, kpn;
|
||||
int nip, nkp, nm1;
|
||||
bigfloat em, q, rownrm, big, size, pivot, sum;
|
||||
bigfloat *aa;
|
||||
|
||||
// simq() work vector
|
||||
int *IPS = new int[(neq) * sizeof(int)];
|
||||
|
||||
nm1 = n - 1;
|
||||
// Initialize IPS and X
|
||||
|
||||
ij = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
IPS[i] = i;
|
||||
rownrm = 0.0;
|
||||
for(j = 0; j < n; j++) {
|
||||
q = abs_bf(A[ij]);
|
||||
if(rownrm < q) rownrm = q;
|
||||
++ij;
|
||||
}
|
||||
if (rownrm == (bigfloat)0l) {
|
||||
std::cout<<"simq rownrm=0\n";
|
||||
delete [] IPS;
|
||||
return(1);
|
||||
}
|
||||
X[i] = (bigfloat)1.0 / rownrm;
|
||||
}
|
||||
|
||||
for (k = 0; k < nm1; k++) {
|
||||
big = 0.0;
|
||||
idxpiv = 0;
|
||||
|
||||
for (i = k; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
size = abs_bf(A[ipk]) * X[ip];
|
||||
if (size > big) {
|
||||
big = size;
|
||||
idxpiv = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (big == (bigfloat)0l) {
|
||||
std::cout<<"simq big=0\n";
|
||||
delete [] IPS;
|
||||
return(2);
|
||||
}
|
||||
if (idxpiv != k) {
|
||||
j = IPS[k];
|
||||
IPS[k] = IPS[idxpiv];
|
||||
IPS[idxpiv] = j;
|
||||
}
|
||||
kp = IPS[k];
|
||||
kpk = n*kp + k;
|
||||
pivot = A[kpk];
|
||||
kp1 = k+1;
|
||||
for (i = kp1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipk = n*ip + k;
|
||||
em = -A[ipk] / pivot;
|
||||
A[ipk] = -em;
|
||||
nip = n*ip;
|
||||
nkp = n*kp;
|
||||
aa = A+nkp+kp1;
|
||||
for (j = kp1; j < n; j++) {
|
||||
ipj = nip + j;
|
||||
A[ipj] = A[ipj] + em * *aa++;
|
||||
}
|
||||
}
|
||||
}
|
||||
kpn = n * IPS[n-1] + n - 1; // last element of IPS[n] th row
|
||||
if (A[kpn] == (bigfloat)0l) {
|
||||
std::cout<<"simq A[kpn]=0\n";
|
||||
delete [] IPS;
|
||||
return(3);
|
||||
}
|
||||
|
||||
|
||||
ip = IPS[0];
|
||||
X[0] = B[ip];
|
||||
for (i = 1; i < n; i++) {
|
||||
ip = IPS[i];
|
||||
ipj = n * ip;
|
||||
sum = 0.0;
|
||||
for (j = 0; j < i; j++) {
|
||||
sum += A[ipj] * X[j];
|
||||
++ipj;
|
||||
}
|
||||
X[i] = B[ip] - sum;
|
||||
}
|
||||
|
||||
ipn = n * IPS[n-1] + n - 1;
|
||||
X[n-1] = X[n-1] / A[ipn];
|
||||
|
||||
for (iback = 1; iback < n; iback++) {
|
||||
//i goes (n-1),...,1
|
||||
i = nm1 - iback;
|
||||
ip = IPS[i];
|
||||
nip = n*ip;
|
||||
sum = 0.0;
|
||||
aa = A+nip+i+1;
|
||||
for (j= i + 1; j < n; j++)
|
||||
sum += *aa++ * X[j];
|
||||
X[i] = (X[i] - sum) / A[nip+i];
|
||||
}
|
||||
|
||||
delete [] IPS;
|
||||
return(0);
|
||||
}
|
||||
|
||||
// Calculate the roots of the approximation
|
||||
int AlgRemez::root() {
|
||||
|
||||
long i,j;
|
||||
bigfloat x,dx=0.05;
|
||||
bigfloat upper=1, lower=-100000;
|
||||
bigfloat tol = 1e-20;
|
||||
|
||||
bigfloat *poly = new bigfloat[neq+1];
|
||||
|
||||
// First find the numerator roots
|
||||
for (i=0; i<=n; i++) poly[i] = param[i];
|
||||
|
||||
for (i=n-1; i>=0; i--) {
|
||||
roots[i] = rtnewt(poly,i+1,lower,upper,tol);
|
||||
if (roots[i] == 0.0) {
|
||||
std::cout<<"Failure to converge on root "<<i+1<<"/"<<n<<"\n";
|
||||
return 0;
|
||||
}
|
||||
poly[0] = -poly[0]/roots[i];
|
||||
for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/roots[i];
|
||||
}
|
||||
|
||||
// Now find the denominator roots
|
||||
poly[d] = 1l;
|
||||
for (i=0; i<d; i++) poly[i] = param[n+1+i];
|
||||
|
||||
for (i=d-1; i>=0; i--) {
|
||||
poles[i]=rtnewt(poly,i+1,lower,upper,tol);
|
||||
if (poles[i] == 0.0) {
|
||||
std::cout<<"Failure to converge on pole "<<i+1<<"/"<<d<<"\n";
|
||||
return 0;
|
||||
}
|
||||
poly[0] = -poly[0]/poles[i];
|
||||
for (j=1; j<=i; j++) poly[j] = (poly[j-1] - poly[j])/poles[i];
|
||||
}
|
||||
|
||||
norm = param[n];
|
||||
|
||||
delete [] poly;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Evaluate the polynomial
|
||||
bigfloat AlgRemez::polyEval(bigfloat x, bigfloat *poly, long size) {
|
||||
bigfloat f = poly[size];
|
||||
for (int i=size-1; i>=0; i--) f = f*x + poly[i];
|
||||
return f;
|
||||
}
|
||||
|
||||
// Evaluate the differential of the polynomial
|
||||
bigfloat AlgRemez::polyDiff(bigfloat x, bigfloat *poly, long size) {
|
||||
bigfloat df = (bigfloat)size*poly[size];
|
||||
for (int i=size-1; i>0; i--) df = df*x + (bigfloat)i*poly[i];
|
||||
return df;
|
||||
}
|
||||
|
||||
|
||||
// Newton's method to calculate roots
|
||||
bigfloat AlgRemez::rtnewt(bigfloat *poly, long i, bigfloat x1,
|
||||
bigfloat x2, bigfloat xacc) {
|
||||
int j;
|
||||
bigfloat df, dx, f, rtn;
|
||||
|
||||
rtn=(bigfloat)0.5*(x1+x2);
|
||||
for (j=1; j<=JMAX;j++) {
|
||||
f = polyEval(rtn, poly, i);
|
||||
df = polyDiff(rtn, poly, i);
|
||||
dx = f/df;
|
||||
rtn -= dx;
|
||||
if (abs_bf(dx) < xacc) return rtn;
|
||||
}
|
||||
std::cout<<"Maximum number of iterations exceeded in rtnewt\n";
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// Evaluate the partial fraction expansion of the rational function
|
||||
// with res roots and poles poles. Result is overwritten on input
|
||||
// arrays.
|
||||
void AlgRemez::pfe(bigfloat *res, bigfloat *poles, bigfloat norm) {
|
||||
int i,j,small;
|
||||
bigfloat temp;
|
||||
bigfloat *numerator = new bigfloat[n];
|
||||
bigfloat *denominator = new bigfloat[d];
|
||||
|
||||
// Construct the polynomials explicitly
|
||||
for (i=1; i<n; i++) {
|
||||
numerator[i] = 0l;
|
||||
denominator[i] = 0l;
|
||||
}
|
||||
numerator[0]=1l;
|
||||
denominator[0]=1l;
|
||||
|
||||
for (j=0; j<n; j++) {
|
||||
for (i=n-1; i>=0; i--) {
|
||||
numerator[i] *= -res[j];
|
||||
denominator[i] *= -poles[j];
|
||||
if (i>0) {
|
||||
numerator[i] += numerator[i-1];
|
||||
denominator[i] += denominator[i-1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to proper fraction form.
|
||||
// Fraction is now in the form 1 + n/d, where O(n)+1=O(d)
|
||||
for (i=0; i<n; i++) numerator[i] -= denominator[i];
|
||||
|
||||
// Find the residues of the partial fraction expansion and absorb the
|
||||
// coefficients.
|
||||
for (i=0; i<n; i++) {
|
||||
res[i] = 0l;
|
||||
for (j=n-1; j>=0; j--) {
|
||||
res[i] = poles[i]*res[i]+numerator[j];
|
||||
}
|
||||
for (j=n-1; j>=0; j--) {
|
||||
if (i!=j) res[i] /= poles[i]-poles[j];
|
||||
}
|
||||
res[i] *= norm;
|
||||
}
|
||||
|
||||
// res now holds the residues
|
||||
j = 0;
|
||||
for (i=0; i<n; i++) poles[i] = -poles[i];
|
||||
|
||||
// Move the ordering of the poles from smallest to largest
|
||||
for (j=0; j<n; j++) {
|
||||
small = j;
|
||||
for (i=j+1; i<n; i++) {
|
||||
if (poles[i] < poles[small]) small = i;
|
||||
}
|
||||
if (small != j) {
|
||||
temp = poles[small];
|
||||
poles[small] = poles[j];
|
||||
poles[j] = temp;
|
||||
temp = res[small];
|
||||
res[small] = res[j];
|
||||
res[j] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
delete [] numerator;
|
||||
delete [] denominator;
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateApprox(double x) {
|
||||
return (double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateInverseApprox(double x) {
|
||||
return 1.0/(double)approx((bigfloat)x);
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateFunc(double x) {
|
||||
return (double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
double AlgRemez::evaluateInverseFunc(double x) {
|
||||
return 1.0/(double)func((bigfloat)x);
|
||||
}
|
||||
|
||||
void AlgRemez::csv(std::ostream & os)
|
||||
{
|
||||
double lambda_low = apstrt;
|
||||
double lambda_high= apend;
|
||||
for (double x=lambda_low; x<lambda_high; x*=1.05) {
|
||||
double f = evaluateFunc(x);
|
||||
double r = evaluateApprox(x);
|
||||
os<< x<<","<<r<<","<<f<<","<<r-f<<std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
184
Grid/algorithms/approx/Remez.h
Normal file
184
Grid/algorithms/approx/Remez.h
Normal file
@ -0,0 +1,184 @@
|
||||
/*
|
||||
|
||||
Mike Clark - 25th May 2005
|
||||
|
||||
alg_remez.h
|
||||
|
||||
AlgRemez is an implementation of the Remez algorithm, which in this
|
||||
case is used for generating the optimal nth root rational
|
||||
approximation.
|
||||
|
||||
Note this class requires the gnu multiprecision (GNU MP) library.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_ALG_REMEZ_H
|
||||
#define INCLUDED_ALG_REMEZ_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <Grid/GridStd.h>
|
||||
|
||||
#ifdef HAVE_LIBGMP
|
||||
#include "bigfloat.h"
|
||||
#else
|
||||
#include "bigfloat_double.h"
|
||||
#endif
|
||||
|
||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||
|
||||
/*
|
||||
*Usage examples
|
||||
AlgRemez remez(lambda_low,lambda_high,precision);
|
||||
error = remez.generateApprox(n,d,y,z);
|
||||
remez.getPFE(res,pole,&norm);
|
||||
remez.getIPFE(res,pole,&norm);
|
||||
remez.csv(ostream &os);
|
||||
*/
|
||||
|
||||
class AlgRemez
|
||||
{
|
||||
private:
|
||||
char *cname;
|
||||
|
||||
// The approximation parameters
|
||||
bigfloat *param, *roots, *poles;
|
||||
bigfloat norm;
|
||||
|
||||
// The numerator and denominator degree (n=d)
|
||||
int n, d;
|
||||
|
||||
// The bounds of the approximation
|
||||
bigfloat apstrt, apwidt, apend;
|
||||
|
||||
// the numerator and denominator of the power we are approximating
|
||||
unsigned long power_num;
|
||||
unsigned long power_den;
|
||||
|
||||
// Flag to determine whether the arrays have been allocated
|
||||
int alloc;
|
||||
|
||||
// Flag to determine whether the roots have been found
|
||||
int foundRoots;
|
||||
|
||||
// Variables used to calculate the approximation
|
||||
int nd1, iter;
|
||||
bigfloat *xx, *mm, *step;
|
||||
bigfloat delta, spread, tolerance;
|
||||
|
||||
// The exponential summation coefficients
|
||||
bigfloat *a;
|
||||
int *a_power;
|
||||
int a_length;
|
||||
|
||||
// The number of equations we must solve at each iteration (n+d+1)
|
||||
int neq;
|
||||
|
||||
// The precision of the GNU MP library
|
||||
long prec;
|
||||
|
||||
// Initial values of maximal and minmal errors
|
||||
void initialGuess();
|
||||
|
||||
// Solve the equations
|
||||
void equations();
|
||||
|
||||
// Search for error maxima and minima
|
||||
void search(bigfloat *step);
|
||||
|
||||
// Initialise step sizes
|
||||
void stpini(bigfloat *step);
|
||||
|
||||
// Calculate the roots of the approximation
|
||||
int root();
|
||||
|
||||
// Evaluate the polynomial
|
||||
bigfloat polyEval(bigfloat x, bigfloat *poly, long size);
|
||||
//complex_bf polyEval(complex_bf x, complex_bf *poly, long size);
|
||||
|
||||
// Evaluate the differential of the polynomial
|
||||
bigfloat polyDiff(bigfloat x, bigfloat *poly, long size);
|
||||
//complex_bf polyDiff(complex_bf x, complex_bf *poly, long size);
|
||||
|
||||
// Newton's method to calculate roots
|
||||
bigfloat rtnewt(bigfloat *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
|
||||
//complex_bf rtnewt(complex_bf *poly, long i, bigfloat x1, bigfloat x2, bigfloat xacc);
|
||||
|
||||
// Evaluate the partial fraction expansion of the rational function
|
||||
// with res roots and poles poles. Result is overwritten on input
|
||||
// arrays.
|
||||
void pfe(bigfloat *res, bigfloat* poles, bigfloat norm);
|
||||
|
||||
// Calculate function required for the approximation
|
||||
bigfloat func(bigfloat x);
|
||||
|
||||
// Compute size and sign of the approximation error at x
|
||||
bigfloat getErr(bigfloat x, int *sign);
|
||||
|
||||
// Solve the system AX=B
|
||||
int simq(bigfloat *A, bigfloat *B, bigfloat *X, int n);
|
||||
|
||||
// Free memory and reallocate as necessary
|
||||
void allocate(int num_degree, int den_degree);
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the
|
||||
// solution vector param
|
||||
bigfloat approx(bigfloat x);
|
||||
|
||||
public:
|
||||
|
||||
// Constructor
|
||||
AlgRemez(double lower, double upper, long prec);
|
||||
|
||||
// Destructor
|
||||
virtual ~AlgRemez();
|
||||
|
||||
int getDegree(void){
|
||||
assert(n==d);
|
||||
return n;
|
||||
}
|
||||
// Reset the bounds of the approximation
|
||||
void setBounds(double lower, double upper);
|
||||
// Reset the bounds of the approximation
|
||||
void getBounds(double &lower, double &upper) {
|
||||
lower=(double)apstrt;
|
||||
upper=(double)apend;
|
||||
}
|
||||
|
||||
// Generate the rational approximation x^(pnum/pden)
|
||||
double generateApprox(int num_degree, int den_degree,
|
||||
unsigned long power_num, unsigned long power_den,
|
||||
int a_len, double* a_param, int* a_pow);
|
||||
double generateApprox(int num_degree, int den_degree,
|
||||
unsigned long power_num, unsigned long power_den);
|
||||
double generateApprox(int degree, unsigned long power_num,
|
||||
unsigned long power_den);
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(pnum/pden)
|
||||
int getPFE(double *res, double *pole, double *norm);
|
||||
|
||||
// Return the partial fraction expansion of the approximation x^(-pnum/pden)
|
||||
int getIPFE(double *res, double *pole, double *norm);
|
||||
|
||||
// Evaluate the rational form P(x)/Q(x) using coefficients from the
|
||||
// solution vector param
|
||||
double evaluateApprox(double x);
|
||||
|
||||
// Evaluate the rational form Q(x)/P(x) using coefficients from the
|
||||
// solution vector param
|
||||
double evaluateInverseApprox(double x);
|
||||
|
||||
// Calculate function required for the approximation
|
||||
double evaluateFunc(double x);
|
||||
|
||||
// Calculate inverse function required for the approximation
|
||||
double evaluateInverseFunc(double x);
|
||||
|
||||
// Dump csv of function, approx and error
|
||||
void csv(std::ostream &os);
|
||||
};
|
||||
|
||||
#endif // Include guard
|
||||
|
||||
|
||||
|
730
Grid/algorithms/approx/Zolotarev.cc
Normal file
730
Grid/algorithms/approx/Zolotarev.cc
Normal file
@ -0,0 +1,730 @@
|
||||
/* -*- Mode: C; comment-column: 22; fill-column: 79; compile-command: "gcc -o zolotarev zolotarev.c -ansi -pedantic -lm -DTEST"; -*- */
|
||||
#define VERSION Source Time-stamp: <2015-05-18 16:32:08 neo>
|
||||
|
||||
/* These C routines evalute the optimal rational approximation to the signum
|
||||
* function for epsilon < |x| < 1 using Zolotarev's theorem.
|
||||
*
|
||||
* To obtain reliable results for high degree approximations (large n) it is
|
||||
* necessary to compute using sufficiently high precision arithmetic. To this
|
||||
* end the code has been parameterised to work with the preprocessor names
|
||||
* INTERNAL_PRECISION and PRECISION set to float, double, or long double as
|
||||
* appropriate. INTERNAL_PRECISION is used in computing the Zolotarev
|
||||
* coefficients, which are converted to PRECISION before being returned to the
|
||||
* caller. Presumably even higher precision could be obtained using GMP or
|
||||
* similar package, but bear in mind that rounding errors might also be
|
||||
* significant in evaluating the resulting polynomial. The convergence criteria
|
||||
* have been written in a precision-independent form. */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
#ifndef INTERNAL_PRECISION
|
||||
#define INTERNAL_PRECISION double
|
||||
#endif
|
||||
|
||||
#include "Zolotarev.h"
|
||||
#define ZOLOTAREV_INTERNAL
|
||||
#undef ZOLOTAREV_DATA
|
||||
#define ZOLOTAREV_DATA izd
|
||||
#undef ZPRECISION
|
||||
#define ZPRECISION INTERNAL_PRECISION
|
||||
#include "Zolotarev.h"
|
||||
#undef ZOLOTAREV_INTERNAL
|
||||
|
||||
/* The ANSI standard appears not to know what pi is */
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI ((INTERNAL_PRECISION) 3.141592653589793238462643383279502884197\
|
||||
169399375105820974944592307816406286208998628034825342117068)
|
||||
#endif
|
||||
|
||||
#define ZERO ((INTERNAL_PRECISION) 0)
|
||||
#define ONE ((INTERNAL_PRECISION) 1)
|
||||
#define TWO ((INTERNAL_PRECISION) 2)
|
||||
#define THREE ((INTERNAL_PRECISION) 3)
|
||||
#define FOUR ((INTERNAL_PRECISION) 4)
|
||||
#define HALF (ONE/TWO)
|
||||
|
||||
/* The following obscenity seems to be the simplest (?) way to coerce the C
|
||||
* preprocessor to convert the value of a preprocessor token into a string. */
|
||||
|
||||
#define PP2(x) #x
|
||||
#define PP1(a,b,c) a ## b(c)
|
||||
#define STRINGIFY(name) PP1(PP,2,name)
|
||||
|
||||
/* Compute the partial fraction expansion coefficients (alpha) from the
|
||||
* factored form */
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
NAMESPACE_BEGIN(Approx);
|
||||
|
||||
static void construct_partfrac(izd *z) {
|
||||
int dn = z -> dn, dd = z -> dd, type = z -> type;
|
||||
int j, k, da = dd + 1 + type;
|
||||
INTERNAL_PRECISION A = z -> A, *a = z -> a, *ap = z -> ap, *alpha;
|
||||
alpha = (INTERNAL_PRECISION*) malloc(da * sizeof(INTERNAL_PRECISION));
|
||||
for (j = 0; j < dd; j++)
|
||||
for (k = 0, alpha[j] = A; k < dd; k++)
|
||||
alpha[j] *=
|
||||
(k < dn ? ap[j] - a[k] : ONE) / (k == j ? ONE : ap[j] - ap[k]);
|
||||
if(type == 1) /* implicit pole at zero? */
|
||||
for (k = 0, alpha[dd] = A * (dn > dd ? - a[dd] : ONE); k < dd; k++) {
|
||||
alpha[dd] *= a[k] / ap[k];
|
||||
alpha[k] *= (dn > dd ? ap[k] - a[dd] : ONE) / ap[k];
|
||||
}
|
||||
alpha[da-1] = dn == da - 1 ? A : ZERO;
|
||||
z -> alpha = alpha;
|
||||
z -> da = da;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Convert factored polynomial into dense polynomial. The input is the overall
|
||||
* factor A and the roots a[i], such that p = A product(x - a[i], i = 1..d) */
|
||||
|
||||
static INTERNAL_PRECISION *poly_factored_to_dense(INTERNAL_PRECISION A,
|
||||
INTERNAL_PRECISION *a,
|
||||
int d) {
|
||||
INTERNAL_PRECISION *p;
|
||||
int i, j;
|
||||
p = (INTERNAL_PRECISION *) malloc((d + 2) * sizeof(INTERNAL_PRECISION));
|
||||
p[0] = A;
|
||||
for (i = 0; i < d; i++) {
|
||||
p[i+1] = p[i];
|
||||
for (j = i; j > 0; j--) p[j] = p[j-1] - a[i]*p[j];
|
||||
p[0] *= - a[i];
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Convert a rational function of the form R0(x) = x p(x^2)/q(x^2) (type 0) or
|
||||
* R1(x) = p(x^2)/[x q(x^2)] (type 1) into its continued fraction
|
||||
* representation. We assume that 0 <= deg(q) - deg(p) <= 1 for type 0 and 0 <=
|
||||
* deg(p) - deg(q) <= 1 for type 1. On input p and q are in factored form, and
|
||||
* deg(q) = dq, deg(p) = dp. The output is the continued fraction coefficients
|
||||
* beta, where R(x) = beta[0] x + 1/(beta[1] x + 1/(...)).
|
||||
*
|
||||
* The method used is as follows. There are four cases to consider:
|
||||
*
|
||||
* 0.i. Type 0, deg p = deg q
|
||||
*
|
||||
* 0.ii. Type 0, deg p = deg q - 1
|
||||
*
|
||||
* 1.i. Type 1, deg p = deg q
|
||||
*
|
||||
* 1.ii. Type 1, deg p = deg q + 1
|
||||
*
|
||||
* and these are connected by two transformations:
|
||||
*
|
||||
* A. To obtain a continued fraction expansion of type 1 we use a single-step
|
||||
* polynomial division we find beta and r(x) such that p(x) = beta x q(x) +
|
||||
* r(x), with deg(r) = deg(q). This implies that p(x^2) = beta x^2 q(x^2) +
|
||||
* r(x^2), and thus R1(x) = x beta + r(x^2)/(x q(x^2)) = x beta + 1/R0(x)
|
||||
* with R0(x) = x q(x^2)/r(x^2).
|
||||
*
|
||||
* B. A continued fraction expansion of type 0 is obtained in a similar, but
|
||||
* not identical, manner. We use the polynomial division algorithm to compute
|
||||
* the quotient beta and the remainder r that satisfy p(x) = beta q(x) + r(x)
|
||||
* with deg(r) = deg(q) - 1. We thus have x p(x^2) = x beta q(x^2) + x r(x^2),
|
||||
* so R0(x) = x beta + x r(x^2)/q(x^2) = x beta + 1/R1(x) with R1(x) = q(x^2) /
|
||||
* (x r(x^2)).
|
||||
*
|
||||
* Note that the deg(r) must be exactly deg(q) for (A) and deg(q) - 1 for (B)
|
||||
* because p and q have disjoint roots all of multiplicity 1. This means that
|
||||
* the division algorithm requires only a single polynomial subtraction step.
|
||||
*
|
||||
* The transformations between the cases form the following finite state
|
||||
* automaton:
|
||||
*
|
||||
* +------+ +------+ +------+ +------+
|
||||
* | | | | ---(A)---> | | | |
|
||||
* | 0.ii | ---(B)---> | 1.ii | | 0.i | <---(A)--- | 1.i |
|
||||
* | | | | <---(B)--- | | | |
|
||||
* +------+ +------+ +------+ +------+
|
||||
*/
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_A(INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *, int, int);
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_B(INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *,
|
||||
INTERNAL_PRECISION *, int, int);
|
||||
|
||||
static void construct_contfrac(izd *z){
|
||||
INTERNAL_PRECISION *r, A = z -> A, *p = z -> a, *q = z -> ap;
|
||||
int dp = z -> dn, dq = z -> dd, type = z -> type;
|
||||
|
||||
z -> db = 2 * dq + 1 + type;
|
||||
z -> beta = (INTERNAL_PRECISION *)
|
||||
malloc(z -> db * sizeof(INTERNAL_PRECISION));
|
||||
p = poly_factored_to_dense(A, p, dp);
|
||||
q = poly_factored_to_dense(ONE, q, dq);
|
||||
r = (INTERNAL_PRECISION *) malloc((MAX(dp,dq) + 1) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
if (type == 0) (void) contfrac_B(z -> beta, p, q, r, dp, dq);
|
||||
else (void) contfrac_A(z -> beta, p, q, r, dp, dq);
|
||||
free(p); free(q); free(r);
|
||||
return;
|
||||
}
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_A(INTERNAL_PRECISION *beta,
|
||||
INTERNAL_PRECISION *p,
|
||||
INTERNAL_PRECISION *q,
|
||||
INTERNAL_PRECISION *r, int dp, int dq) {
|
||||
INTERNAL_PRECISION quot, *rb;
|
||||
int j;
|
||||
|
||||
/* p(x) = x beta q(x) + r(x); dp = dq or dp = dq + 1 */
|
||||
|
||||
quot = dp == dq ? ZERO : p[dp] / q[dq];
|
||||
r[0] = p[0];
|
||||
for (j = 1; j <= dp; j++) r[j] = p[j] - quot * q[j-1];
|
||||
#ifdef DEBUG
|
||||
printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
|
||||
__FUNCTION__, dp, dq, (float) quot);
|
||||
for (j = 0; j <= dq + 1; j++)
|
||||
printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
|
||||
j, (float) (j > dp ? ZERO : p[j]),
|
||||
j, (float) (j == 0 ? ZERO : q[j-1]),
|
||||
j, (float) (j == dp ? ZERO : r[j]));
|
||||
#endif /* DEBUG */
|
||||
*(rb = contfrac_B(beta, q, r, p, dq, dq)) = quot;
|
||||
return rb + 1;
|
||||
}
|
||||
|
||||
static INTERNAL_PRECISION *contfrac_B(INTERNAL_PRECISION *beta,
|
||||
INTERNAL_PRECISION *p,
|
||||
INTERNAL_PRECISION *q,
|
||||
INTERNAL_PRECISION *r, int dp, int dq) {
|
||||
INTERNAL_PRECISION quot, *rb;
|
||||
int j;
|
||||
|
||||
/* p(x) = beta q(x) + r(x); dp = dq or dp = dq - 1 */
|
||||
|
||||
quot = dp == dq ? p[dp] / q[dq] : ZERO;
|
||||
for (j = 0; j < dq; j++) r[j] = p[j] - quot * q[j];
|
||||
#ifdef DEBUG
|
||||
printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
|
||||
__FUNCTION__, dp, dq, (float) quot);
|
||||
for (j = 0; j <= dq; j++)
|
||||
printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
|
||||
j, (float) (j > dp ? ZERO : p[j]),
|
||||
j, (float) q[j],
|
||||
j, (float) (j == dq ? ZERO : r[j]));
|
||||
#endif /* DEBUG */
|
||||
*(rb = dq > 0 ? contfrac_A(beta, q, r, p, dq, dq-1) : beta) = quot;
|
||||
return rb + 1;
|
||||
}
|
||||
|
||||
/* The global variable U is used to hold the argument u throughout the AGM
|
||||
* recursion. The global variables F and K are set in the innermost
|
||||
* instantiation of the recursive function AGM to the values of the elliptic
|
||||
* integrals F(u,k) and K(k) respectively. They must be made thread local to
|
||||
* make this code thread-safe in a multithreaded environment. */
|
||||
|
||||
static INTERNAL_PRECISION U, F, K; /* THREAD LOCAL */
|
||||
|
||||
/* Recursive implementation of Gauss' arithmetico-geometric mean, which is the
|
||||
* kernel of the method used to compute the Jacobian elliptic functions
|
||||
* sn(u,k), cn(u,k), and dn(u,k) with parameter k (where 0 < k < 1), as well
|
||||
* as the elliptic integral F(s,k) satisfying F(sn(u,k)) = u and the complete
|
||||
* elliptic integral K(k).
|
||||
*
|
||||
* The algorithm used is a recursive implementation of the Gauss (Landen)
|
||||
* transformation.
|
||||
*
|
||||
* The function returns the value of sn(u,k'), where k' is the dual parameter,
|
||||
* and also sets the values of the global variables F and K. The latter is
|
||||
* used to determine the sign of cn(u,k').
|
||||
*
|
||||
* The algorithm is deemed to have converged when b ceases to increase. This
|
||||
* works whatever INTERNAL_PRECISION is specified. */
|
||||
|
||||
static INTERNAL_PRECISION AGM(INTERNAL_PRECISION a,
|
||||
INTERNAL_PRECISION b,
|
||||
INTERNAL_PRECISION s) {
|
||||
static INTERNAL_PRECISION pb = -ONE;
|
||||
INTERNAL_PRECISION c, d, xi;
|
||||
|
||||
if (b <= pb) {
|
||||
pb = -ONE;
|
||||
F = asin(s) / a; /* Here, a is the AGM */
|
||||
K = M_PI / (TWO * a);
|
||||
return sin(U * a);
|
||||
}
|
||||
pb = b;
|
||||
c = a - b;
|
||||
d = a + b;
|
||||
xi = AGM(HALF*d, sqrt(a*b), ONE + c*c == ONE ?
|
||||
HALF*s*d/a : (a - sqrt(a*a - s*s*c*d))/(c*s));
|
||||
return 2*a*xi / (d + c*xi*xi);
|
||||
}
|
||||
|
||||
/* Computes sn(u,k), cn(u,k), dn(u,k), F(u,k), and K(k). It is essentially a
|
||||
* wrapper for the routine AGM. The sign of cn(u,k) is defined to be -1 if
|
||||
* K(k) < u < 3*K(k) and +1 otherwise, and thus sign is computed by some quite
|
||||
* unnecessarily obfuscated bit manipulations. */
|
||||
|
||||
static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k,
|
||||
INTERNAL_PRECISION* sn, INTERNAL_PRECISION* cn,
|
||||
INTERNAL_PRECISION* dn, INTERNAL_PRECISION* elF,
|
||||
INTERNAL_PRECISION* elK) {
|
||||
int sgn;
|
||||
U = u;
|
||||
*sn = AGM(ONE, sqrt(ONE - k*k), u);
|
||||
sgn = ((int) (fabs(u) / K)) % 4; /* sgn = 0, 1, 2, 3 */
|
||||
sgn ^= sgn >> 1; /* (sgn & 1) = 0, 1, 1, 0 */
|
||||
sgn = 1 - ((sgn & 1) << 1); /* sgn = 1, -1, -1, 1 */
|
||||
*cn = ((INTERNAL_PRECISION) sgn) * sqrt(ONE - *sn * *sn);
|
||||
*dn = sqrt(ONE - k*k* *sn * *sn);
|
||||
*elF = F;
|
||||
*elK = K;
|
||||
}
|
||||
|
||||
/* Compute the coefficients for the optimal rational approximation R(x) to
|
||||
* sgn(x) of degree n over the interval epsilon < |x| < 1 using Zolotarev's
|
||||
* formula.
|
||||
*
|
||||
* Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
|
||||
* type = 1 for the approximation which is infinite at x = 0. */
|
||||
|
||||
zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
|
||||
INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
|
||||
l, invlambda, xi, xisq, *tv, s, opl;
|
||||
int m, czero, ts;
|
||||
zolotarev_data *zd;
|
||||
izd *d = (izd*) malloc(sizeof(izd));
|
||||
|
||||
d -> type = type;
|
||||
d -> epsilon = (INTERNAL_PRECISION) epsilon;
|
||||
d -> n = n;
|
||||
d -> dd = n / 2;
|
||||
d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
|
||||
d -> deg_denom = 2 * d -> dd;
|
||||
d -> deg_num = 2 * d -> dn + 1;
|
||||
|
||||
d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
ksq = d -> epsilon * d -> epsilon;
|
||||
kp = sqrt(ONE - ksq);
|
||||
sncndnFK(ZERO, kp, &sn, &cn, &dn, &F, &Kp); /* compute Kp = K(kp) */
|
||||
z0 = TWO * Kp / (INTERNAL_PRECISION) n;
|
||||
M = ONE;
|
||||
A = ONE / d -> epsilon;
|
||||
|
||||
sncndnFK(HALF * z0, kp, &sn, &cn, &dn, &F, &Kj); /* compute xi */
|
||||
xi = ONE / dn;
|
||||
xisq = xi * xi;
|
||||
invlambda = xi;
|
||||
|
||||
for (m = 0; m < d -> dd; m++) {
|
||||
czero = 2 * (m + 1) == n; /* n even and m = dd -1 */
|
||||
|
||||
z = z0 * ((INTERNAL_PRECISION) m + ONE);
|
||||
sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
|
||||
t = cn / sn;
|
||||
c = - t*t;
|
||||
if (!czero) (d -> a)[d -> dn - 1 - m] = ksq / c;
|
||||
|
||||
z = z0 * ((INTERNAL_PRECISION) m + HALF);
|
||||
sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
|
||||
t = cn / sn;
|
||||
cp = - t*t;
|
||||
(d -> ap)[d -> dd - 1 - m] = ksq / cp;
|
||||
|
||||
M *= (ONE - c) / (ONE - cp);
|
||||
A *= (czero ? -ksq : c) * (ONE - cp) / (cp * (ONE - c));
|
||||
invlambda *= (ONE - c*xisq) / (ONE - cp*xisq);
|
||||
}
|
||||
invlambda /= M;
|
||||
d -> A = TWO / (ONE + invlambda) * A;
|
||||
d -> Delta = (invlambda - ONE) / (invlambda + ONE);
|
||||
|
||||
d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
l = ONE / invlambda;
|
||||
opl = ONE + l;
|
||||
sncndnFK(sqrt( d -> type == 1
|
||||
? (THREE + l) / (FOUR * opl)
|
||||
: (ONE + THREE*l) / (opl*opl*opl)
|
||||
), sqrt(ONE - l*l), &sn, &cn, &dn, &F, &Kj);
|
||||
s = M * F;
|
||||
for (m = 0; m < d -> n; m++) {
|
||||
sncndnFK(s + TWO*Kp*m/n, kp, &sn, &cn, &dn, &F, &Kj);
|
||||
d -> gamma[m] = d -> epsilon / dn;
|
||||
}
|
||||
|
||||
/* If R(x) is a Zolotarev rational approximation of degree (n,m) with maximum
|
||||
* error Delta, then (1 - Delta^2) / R(x) is also an optimal Chebyshev
|
||||
* approximation of degree (m,n) */
|
||||
|
||||
if (d -> type == 1) {
|
||||
d -> A = (ONE - d -> Delta * d -> Delta) / d -> A;
|
||||
tv = d -> a; d -> a = d -> ap; d -> ap = tv;
|
||||
ts = d -> dn; d -> dn = d -> dd; d -> dd = ts;
|
||||
ts = d -> deg_num; d -> deg_num = d -> deg_denom; d -> deg_denom = ts;
|
||||
}
|
||||
|
||||
construct_partfrac(d);
|
||||
construct_contfrac(d);
|
||||
|
||||
/* Converting everything to PRECISION for external use only */
|
||||
|
||||
zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
|
||||
zd -> A = (PRECISION) d -> A;
|
||||
zd -> Delta = (PRECISION) d -> Delta;
|
||||
zd -> epsilon = (PRECISION) d -> epsilon;
|
||||
zd -> n = d -> n;
|
||||
zd -> type = d -> type;
|
||||
zd -> dn = d -> dn;
|
||||
zd -> dd = d -> dd;
|
||||
zd -> da = d -> da;
|
||||
zd -> db = d -> db;
|
||||
zd -> deg_num = d -> deg_num;
|
||||
zd -> deg_denom = d -> deg_denom;
|
||||
|
||||
zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
|
||||
free(d -> a);
|
||||
|
||||
zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
|
||||
free(d -> ap);
|
||||
|
||||
zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
|
||||
free(d -> alpha);
|
||||
|
||||
zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
|
||||
free(d -> beta);
|
||||
|
||||
zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
|
||||
free(d -> gamma);
|
||||
|
||||
free(d);
|
||||
return zd;
|
||||
}
|
||||
|
||||
|
||||
void zolotarev_free(zolotarev_data *zdata)
|
||||
{
|
||||
free(zdata -> a);
|
||||
free(zdata -> ap);
|
||||
free(zdata -> alpha);
|
||||
free(zdata -> beta);
|
||||
free(zdata -> gamma);
|
||||
free(zdata);
|
||||
}
|
||||
|
||||
|
||||
zolotarev_data* higham(PRECISION epsilon, int n) {
|
||||
INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
|
||||
int m, czero;
|
||||
zolotarev_data *zd;
|
||||
izd *d = (izd*) malloc(sizeof(izd));
|
||||
|
||||
d -> type = 0;
|
||||
d -> epsilon = (INTERNAL_PRECISION) epsilon;
|
||||
d -> n = n;
|
||||
d -> dd = n / 2;
|
||||
d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
|
||||
d -> deg_denom = 2 * d -> dd;
|
||||
d -> deg_num = 2 * d -> dn + 1;
|
||||
|
||||
d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
A = (INTERNAL_PRECISION) n;
|
||||
z0 = M_PI / A;
|
||||
A = n % 2 == 0 ? A : ONE / A;
|
||||
M = d -> epsilon * A;
|
||||
epssq = d -> epsilon * d -> epsilon;
|
||||
|
||||
for (m = 0; m < d -> dd; m++) {
|
||||
czero = 2 * (m + 1) == n; /* n even and m = dd - 1*/
|
||||
|
||||
if (!czero) {
|
||||
z = z0 * ((INTERNAL_PRECISION) m + ONE);
|
||||
t = tan(z);
|
||||
c = - t*t;
|
||||
(d -> a)[d -> dn - 1 - m] = c;
|
||||
M *= epssq - c;
|
||||
}
|
||||
|
||||
z = z0 * ((INTERNAL_PRECISION) m + HALF);
|
||||
t = tan(z);
|
||||
cp = - t*t;
|
||||
(d -> ap)[d -> dd - 1 - m] = cp;
|
||||
M /= epssq - cp;
|
||||
}
|
||||
|
||||
d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
|
||||
sizeof(INTERNAL_PRECISION));
|
||||
for (m = 0; m < d -> n; m++) d -> gamma[m] = ONE;
|
||||
|
||||
d -> A = A;
|
||||
d -> Delta = ONE - M;
|
||||
|
||||
construct_partfrac(d);
|
||||
construct_contfrac(d);
|
||||
|
||||
/* Converting everything to PRECISION for external use only */
|
||||
|
||||
zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
|
||||
zd -> A = (PRECISION) d -> A;
|
||||
zd -> Delta = (PRECISION) d -> Delta;
|
||||
zd -> epsilon = (PRECISION) d -> epsilon;
|
||||
zd -> n = d -> n;
|
||||
zd -> type = d -> type;
|
||||
zd -> dn = d -> dn;
|
||||
zd -> dd = d -> dd;
|
||||
zd -> da = d -> da;
|
||||
zd -> db = d -> db;
|
||||
zd -> deg_num = d -> deg_num;
|
||||
zd -> deg_denom = d -> deg_denom;
|
||||
|
||||
zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
|
||||
free(d -> a);
|
||||
|
||||
zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
|
||||
free(d -> ap);
|
||||
|
||||
zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
|
||||
free(d -> alpha);
|
||||
|
||||
zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
|
||||
free(d -> beta);
|
||||
|
||||
zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
|
||||
for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
|
||||
free(d -> gamma);
|
||||
|
||||
free(d);
|
||||
return zd;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Approx);
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#ifdef TEST
|
||||
|
||||
#undef ZERO
|
||||
#define ZERO ((PRECISION) 0)
|
||||
#undef ONE
|
||||
#define ONE ((PRECISION) 1)
|
||||
#undef TWO
|
||||
#define TWO ((PRECISION) 2)
|
||||
|
||||
/* Evaluate the rational approximation R(x) using the factored form */
|
||||
|
||||
static PRECISION zolotarev_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION R;
|
||||
|
||||
if (rdata -> type == 0) {
|
||||
R = rdata -> A * x;
|
||||
for (m = 0; m < rdata -> deg_denom/2; m++)
|
||||
R *= (2*(m+1) > rdata -> deg_num ? ONE : x*x - rdata -> a[m]) /
|
||||
(x*x - rdata -> ap[m]);
|
||||
} else {
|
||||
R = rdata -> A / x;
|
||||
for (m = 0; m < rdata -> deg_num/2; m++)
|
||||
R *= (x*x - rdata -> a[m]) /
|
||||
(2*(m+1) > rdata -> deg_denom ? ONE : x*x - rdata -> ap[m]);
|
||||
}
|
||||
return R;
|
||||
}
|
||||
|
||||
/* Evaluate the rational approximation R(x) using the partial fraction form */
|
||||
|
||||
static PRECISION zolotarev_partfrac_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION R = rdata -> alpha[rdata -> da - 1];
|
||||
for (m = 0; m < rdata -> dd; m++)
|
||||
R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
|
||||
if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
|
||||
return R * x;
|
||||
}
|
||||
|
||||
/* Evaluate the rational approximation R(x) using continued fraction form.
|
||||
*
|
||||
* If x = 0 and type = 1 then the result should be INF, whereas if x = 0 and
|
||||
* type = 0 then the result should be 0, but division by zero will occur at
|
||||
* intermediate stages of the evaluation. For IEEE implementations with
|
||||
* non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
|
||||
* but with signalling overflow you will get an error message. */
|
||||
|
||||
static PRECISION zolotarev_contfrac_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION R = rdata -> beta[0] * x;
|
||||
for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
|
||||
return R;
|
||||
}
|
||||
|
||||
/* Evaluate the rational approximation R(x) using Cayley form */
|
||||
|
||||
static PRECISION zolotarev_cayley_eval(PRECISION x, zolotarev_data* rdata) {
|
||||
int m;
|
||||
PRECISION T;
|
||||
|
||||
T = rdata -> type == 0 ? ONE : -ONE;
|
||||
for (m = 0; m < rdata -> n; m++)
|
||||
T *= (rdata -> gamma[m] - x) / (rdata -> gamma[m] + x);
|
||||
return (ONE - T) / (ONE + T);
|
||||
}
|
||||
|
||||
|
||||
/* Test program. Apart from printing out the parameters for R(x) it produces
|
||||
* the following data files for plotting (unless NPLOT is defined):
|
||||
*
|
||||
* zolotarev-fn is a plot of R(x) for |x| < 1.2. This should look like sgn(x).
|
||||
*
|
||||
* zolotarev-err is a plot of the error |R(x) - sgn(x)| scaled by 1/Delta. This
|
||||
* should oscillate deg_num + deg_denom + 2 times between +1 and -1 over the
|
||||
* domain epsilon <= |x| <= 1.
|
||||
*
|
||||
* If ALLPLOTS is defined then zolotarev-partfrac (zolotarev-contfrac) is a
|
||||
* plot of the difference between the values of R(x) computed using the
|
||||
* factored form and the partial fraction (continued fraction) form, scaled by
|
||||
* 1/Delta. It should be zero everywhere. */
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
int m, n, plotpts = 5000, type = 0;
|
||||
float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
|
||||
zolotarev_data *rdata;
|
||||
PRECISION y;
|
||||
FILE *plot_function, *plot_error,
|
||||
*plot_partfrac, *plot_contfrac, *plot_cayley;
|
||||
|
||||
if (argc < 3 || argc > 4) {
|
||||
fprintf(stderr, "Usage: %s epsilon n [type]\n", *argv);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
sscanf(argv[1], "%g", &eps); /* First argument is epsilon */
|
||||
sscanf(argv[2], "%d", &n); /* Second argument is n */
|
||||
if (argc == 4) sscanf(argv[3], "%d", &type); /* Third argument is type */
|
||||
|
||||
if (type < 0 || type > 2) {
|
||||
fprintf(stderr, "%s: type must be 0 (Zolotarev R(0) = 0),\n"
|
||||
"\t\t1 (Zolotarev R(0) = Inf, or 2 (Higham)\n", *argv);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
rdata = type == 2
|
||||
? higham((PRECISION) eps, n)
|
||||
: zolotarev((PRECISION) eps, n, type);
|
||||
|
||||
printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t"
|
||||
STRINGIFY(VERSION) "\n\t" STRINGIFY(HVERSION)
|
||||
"\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
|
||||
"\tPRECISION = " STRINGIFY(PRECISION)
|
||||
"\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
|
||||
"\tDelta = %g (maximum error)\n\n"
|
||||
"\tA = %g (overall factor)\n",
|
||||
(float) rdata -> epsilon, rdata -> n, type,
|
||||
rdata -> deg_num, rdata -> deg_denom,
|
||||
rdata -> type == 1 ? "infinite" : "zero",
|
||||
(float) rdata -> Delta, (float) rdata -> A);
|
||||
for (m = 0; m < MIN(rdata -> dd, rdata -> dn); m++)
|
||||
printf("\ta[%2d] = %14.8g\t\ta'[%2d] = %14.8g\n",
|
||||
m + 1, (float) rdata -> a[m], m + 1, (float) rdata -> ap[m]);
|
||||
if (rdata -> dd > rdata -> dn)
|
||||
printf("\t\t\t\t\ta'[%2d] = %14.8g\n",
|
||||
rdata -> dn + 1, (float) rdata -> ap[rdata -> dn]);
|
||||
if (rdata -> dd < rdata -> dn)
|
||||
printf("\ta[%2d] = %14.8g\n",
|
||||
rdata -> dd + 1, (float) rdata -> a[rdata -> dd]);
|
||||
|
||||
printf("\n\tPartial fraction coefficients\n");
|
||||
printf("\talpha[ 0] = %14.8g\n",
|
||||
(float) rdata -> alpha[rdata -> da - 1]);
|
||||
for (m = 0; m < rdata -> dd; m++)
|
||||
printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
|
||||
m + 1, (float) rdata -> alpha[m], m + 1, (float) rdata -> ap[m]);
|
||||
if (rdata -> type == 1)
|
||||
printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
|
||||
rdata -> dd + 1, (float) rdata -> alpha[rdata -> dd],
|
||||
rdata -> dd + 1, (float) ZERO);
|
||||
|
||||
printf("\n\tContinued fraction coefficients\n");
|
||||
for (m = 0; m < rdata -> db; m++)
|
||||
printf("\tbeta[%2d] = %14.8g\n", m, (float) rdata -> beta[m]);
|
||||
|
||||
printf("\n\tCayley transform coefficients\n");
|
||||
for (m = 0; m < rdata -> n; m++)
|
||||
printf("\tgamma[%2d] = %14.8g\n", m, (float) rdata -> gamma[m]);
|
||||
|
||||
#ifndef NPLOT
|
||||
plot_function = fopen("zolotarev-fn.dat", "w");
|
||||
plot_error = fopen("zolotarev-err.dat", "w");
|
||||
#ifdef ALLPLOTS
|
||||
plot_partfrac = fopen("zolotarev-partfrac.dat", "w");
|
||||
plot_contfrac = fopen("zolotarev-contfrac.dat", "w");
|
||||
plot_cayley = fopen("zolotarev-cayley.dat", "w");
|
||||
#endif /* ALLPLOTS */
|
||||
for (m = 0, maxypferr = maxycferr = maxycaylerr = 0.0; m <= plotpts; m++) {
|
||||
x = 2.4 * (float) m / plotpts - 1.2;
|
||||
if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
|
||||
/* skip x = 0 for type 1, as R(0) is singular */
|
||||
y = zolotarev_eval((PRECISION) x, rdata);
|
||||
fprintf(plot_function, "%g %g\n", x, (float) y);
|
||||
fprintf(plot_error, "%g %g\n",
|
||||
x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
|
||||
ypferr = (float)((zolotarev_partfrac_eval((PRECISION) x, rdata) - y)
|
||||
/ rdata -> Delta);
|
||||
ycferr = (float)((zolotarev_contfrac_eval((PRECISION) x, rdata) - y)
|
||||
/ rdata -> Delta);
|
||||
ycaylerr = (float)((zolotarev_cayley_eval((PRECISION) x, rdata) - y)
|
||||
/ rdata -> Delta);
|
||||
if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
|
||||
maxypferr = MAX(maxypferr, fabs(ypferr));
|
||||
maxycferr = MAX(maxycferr, fabs(ycferr));
|
||||
maxycaylerr = MAX(maxycaylerr, fabs(ycaylerr));
|
||||
}
|
||||
#ifdef ALLPLOTS
|
||||
fprintf(plot_partfrac, "%g %g\n", x, ypferr);
|
||||
fprintf(plot_contfrac, "%g %g\n", x, ycferr);
|
||||
fprintf(plot_cayley, "%g %g\n", x, ycaylerr);
|
||||
#endif /* ALLPLOTS */
|
||||
}
|
||||
}
|
||||
#ifdef ALLPLOTS
|
||||
fclose(plot_cayley);
|
||||
fclose(plot_contfrac);
|
||||
fclose(plot_partfrac);
|
||||
#endif /* ALLPLOTS */
|
||||
fclose(plot_error);
|
||||
fclose(plot_function);
|
||||
|
||||
printf("\n\tMaximum PF error = %g (relative to Delta)\n", maxypferr);
|
||||
printf("\tMaximum CF error = %g (relative to Delta)\n", maxycferr);
|
||||
printf("\tMaximum Cayley error = %g (relative to Delta)\n", maxycaylerr);
|
||||
#endif /* NPLOT */
|
||||
|
||||
free(rdata -> a);
|
||||
free(rdata -> ap);
|
||||
free(rdata -> alpha);
|
||||
free(rdata -> beta);
|
||||
free(rdata -> gamma);
|
||||
free(rdata);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
#endif /* TEST */
|
||||
|
88
Grid/algorithms/approx/Zolotarev.h
Normal file
88
Grid/algorithms/approx/Zolotarev.h
Normal file
@ -0,0 +1,88 @@
|
||||
/* -*- Mode: C; comment-column: 22; fill-column: 79; -*- */
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <Grid/Namespace.h>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
NAMESPACE_BEGIN(Approx);
|
||||
#endif
|
||||
|
||||
#define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY>
|
||||
|
||||
#ifndef ZOLOTAREV_INTERNAL
|
||||
#ifndef PRECISION
|
||||
#define PRECISION double
|
||||
#endif
|
||||
#define ZPRECISION PRECISION
|
||||
#define ZOLOTAREV_DATA zolotarev_data
|
||||
#endif
|
||||
|
||||
/* This struct contains the coefficients which parameterise an optimal rational
|
||||
* approximation to the signum function.
|
||||
*
|
||||
* The parameterisations used are:
|
||||
*
|
||||
* Factored form for type 0 (R0(0) = 0)
|
||||
*
|
||||
* R0(x) = A * x * prod(x^2 - a[j], j = 0 .. dn-1) / prod(x^2 - ap[j], j = 0
|
||||
* .. dd-1),
|
||||
*
|
||||
* where deg_num = 2*dn + 1 and deg_denom = 2*dd.
|
||||
*
|
||||
* Factored form for type 1 (R1(0) = infinity)
|
||||
*
|
||||
* R1(x) = (A / x) * prod(x^2 - a[j], j = 0 .. dn-1) / prod(x^2 - ap[j], j = 0
|
||||
* .. dd-1),
|
||||
*
|
||||
* where deg_num = 2*dn and deg_denom = 2*dd + 1.
|
||||
*
|
||||
* Partial fraction form
|
||||
*
|
||||
* R(x) = alpha[da] * x + sum(alpha[j] * x / (x^2 - ap[j]), j = 0 .. da-1)
|
||||
*
|
||||
* where da = dd for type 0 and da = dd + 1 with ap[dd] = 0 for type 1.
|
||||
*
|
||||
* Continued fraction form
|
||||
*
|
||||
* R(x) = beta[db-1] * x + 1 / (beta[db-2] * x + 1 / (beta[db-3] * x + ...))
|
||||
*
|
||||
* with the final coefficient being beta[0], with d' = 2 * dd + 1 for type 0
|
||||
* and db = 2 * dd + 2 for type 1.
|
||||
*
|
||||
* Cayley form (Chiu's domain wall formulation)
|
||||
*
|
||||
* R(x) = (1 - T(x)) / (1 + T(x))
|
||||
*
|
||||
* where T(x) = prod((x - gamma[j]) / (x + gamma[j]), j = 0 .. n-1)
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
ZPRECISION *a, /* zeros of numerator, a[0 .. dn-1] */
|
||||
*ap, /* poles (zeros of denominator), ap[0 .. dd-1] */
|
||||
A, /* overall factor */
|
||||
*alpha, /* coefficients of partial fraction, alpha[0 .. da-1] */
|
||||
*beta, /* coefficients of continued fraction, beta[0 .. db-1] */
|
||||
*gamma, /* zeros of numerator of T in Cayley form */
|
||||
Delta, /* maximum error, |R(x) - sgn(x)| <= Delta */
|
||||
epsilon; /* minimum x value, epsilon < |x| < 1 */
|
||||
int n, /* approximation degree */
|
||||
type, /* 0: R(0) = 0, 1: R(0) = infinity */
|
||||
dn, dd, da, db, /* number of elements of a, ap, alpha, and beta */
|
||||
deg_num, /* degree of numerator = deg_denom +/- 1 */
|
||||
deg_denom; /* degree of denominator */
|
||||
} ZOLOTAREV_DATA;
|
||||
|
||||
#ifndef ZOLOTAREV_INTERNAL
|
||||
|
||||
/* zolotarev(epsilon, n, type) returns a pointer to an initialised
|
||||
* zolotarev_data structure. The arguments must satisfy the constraints that
|
||||
* epsilon > 0, n > 0, and type = 0 or 1. */
|
||||
|
||||
ZOLOTAREV_DATA* higham(PRECISION epsilon, int n) ;
|
||||
ZOLOTAREV_DATA* zolotarev(PRECISION epsilon, int n, int type);
|
||||
void zolotarev_free(zolotarev_data *zdata);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
NAMESPACE_END(Approx);
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
206
Grid/algorithms/approx/bigfloat.h
Normal file
206
Grid/algorithms/approx/bigfloat.h
Normal file
@ -0,0 +1,206 @@
|
||||
/*
|
||||
Mike Clark - 25th May 2005
|
||||
|
||||
bigfloat.h
|
||||
|
||||
Simple C++ wrapper for multiprecision datatype used by AlgRemez
|
||||
algorithm
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_BIGFLOAT_H
|
||||
#define INCLUDED_BIGFLOAT_H
|
||||
|
||||
|
||||
#include <gmp.h>
|
||||
#include <mpf2mpfr.h>
|
||||
#include <mpfr.h>
|
||||
class bigfloat {
|
||||
|
||||
private:
|
||||
|
||||
mpf_t x;
|
||||
|
||||
public:
|
||||
|
||||
bigfloat() { mpf_init(x); }
|
||||
bigfloat(const bigfloat& y) { mpf_init_set(x, y.x); }
|
||||
bigfloat(const unsigned long u) { mpf_init_set_ui(x, u); }
|
||||
bigfloat(const long i) { mpf_init_set_si(x, i); }
|
||||
bigfloat(const int i) {mpf_init_set_si(x,(long)i);}
|
||||
bigfloat(const float d) { mpf_init_set_d(x, (double)d); }
|
||||
bigfloat(const double d) { mpf_init_set_d(x, d); }
|
||||
bigfloat(const char *str) { mpf_init_set_str(x, (char*)str, 10); }
|
||||
~bigfloat(void) { mpf_clear(x); }
|
||||
operator double (void) const { return (double)mpf_get_d(x); }
|
||||
static void setDefaultPrecision(unsigned long dprec) {
|
||||
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
||||
mpf_set_default_prec(bprec);
|
||||
}
|
||||
|
||||
void setPrecision(unsigned long dprec) {
|
||||
unsigned long bprec = (unsigned long)(3.321928094 * (double)dprec);
|
||||
mpf_set_prec(x,bprec);
|
||||
}
|
||||
|
||||
unsigned long getPrecision(void) const { return mpf_get_prec(x); }
|
||||
|
||||
unsigned long getDefaultPrecision(void) const { return mpf_get_default_prec(); }
|
||||
|
||||
bigfloat& operator=(const bigfloat& y) {
|
||||
mpf_set(x, y.x);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const unsigned long y) {
|
||||
mpf_set_ui(x, y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const signed long y) {
|
||||
mpf_set_si(x, y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const float y) {
|
||||
mpf_set_d(x, (double)y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bigfloat& operator=(const double y) {
|
||||
mpf_set_d(x, y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t write(void);
|
||||
size_t read(void);
|
||||
|
||||
/* Arithmetic Functions */
|
||||
|
||||
bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
|
||||
bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
|
||||
bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
|
||||
bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_add(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
mpf_add_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_sub(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_ui_sub(a.x,x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
mpf_sub_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x) {
|
||||
bigfloat a;
|
||||
mpf_neg(a.x,x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
mpf_mul(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
mpf_mul_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
|
||||
bigfloat a;
|
||||
mpf_div(a.x,x.x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const unsigned long x, const bigfloat& y){
|
||||
bigfloat a;
|
||||
mpf_ui_div(a.x,x,y.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const unsigned long y){
|
||||
bigfloat a;
|
||||
mpf_div_ui(a.x,x.x,y);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
mpf_sqrt(a.x,x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const unsigned long x){
|
||||
bigfloat a;
|
||||
mpf_sqrt_ui(a.x,x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat abs_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
mpf_abs(a.x,x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, long power) {
|
||||
bigfloat b;
|
||||
mpf_pow_ui(b.x,a.x,power);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
|
||||
bigfloat b;
|
||||
mpfr_pow(b.x,a.x,power.x,GMP_RNDN);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat exp_bf(const bigfloat& a) {
|
||||
bigfloat b;
|
||||
mpfr_exp(b.x,a.x,GMP_RNDN);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Comparison Functions */
|
||||
|
||||
friend int operator>(const bigfloat& x, const bigfloat& y) {
|
||||
int test;
|
||||
test = mpf_cmp(x.x,y.x);
|
||||
if (test > 0) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
friend int operator<(const bigfloat& x, const bigfloat& y) {
|
||||
int test;
|
||||
test = mpf_cmp(x.x,y.x);
|
||||
if (test < 0) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
friend int sgn(const bigfloat&);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
189
Grid/algorithms/approx/bigfloat_double.h
Normal file
189
Grid/algorithms/approx/bigfloat_double.h
Normal file
@ -0,0 +1,189 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/approx/bigfloat_double.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <math.h>
|
||||
|
||||
typedef double mfloat;
|
||||
class bigfloat {
|
||||
private:
|
||||
|
||||
mfloat x;
|
||||
|
||||
public:
|
||||
|
||||
bigfloat() { }
|
||||
bigfloat(const bigfloat& y) { x=y.x; }
|
||||
bigfloat(const unsigned long u) { x=u; }
|
||||
bigfloat(const long i) { x=i; }
|
||||
bigfloat(const int i) { x=i;}
|
||||
bigfloat(const float d) { x=d;}
|
||||
bigfloat(const double d) { x=d;}
|
||||
bigfloat(const char *str) { x=std::stod(std::string(str));}
|
||||
~bigfloat(void) { }
|
||||
operator double (void) const { return (double)x; }
|
||||
static void setDefaultPrecision(unsigned long dprec) {
|
||||
}
|
||||
|
||||
void setPrecision(unsigned long dprec) {
|
||||
}
|
||||
|
||||
unsigned long getPrecision(void) const { return 64; }
|
||||
unsigned long getDefaultPrecision(void) const { return 64; }
|
||||
|
||||
bigfloat& operator=(const bigfloat& y) { x=y.x; return *this; }
|
||||
bigfloat& operator=(const unsigned long y) { x=y; return *this; }
|
||||
bigfloat& operator=(const signed long y) { x=y; return *this; }
|
||||
bigfloat& operator=(const float y) { x=y; return *this; }
|
||||
bigfloat& operator=(const double y) { x=y; return *this; }
|
||||
|
||||
size_t write(void);
|
||||
size_t read(void);
|
||||
|
||||
/* Arithmetic Functions */
|
||||
|
||||
bigfloat& operator+=(const bigfloat& y) { return *this = *this + y; }
|
||||
bigfloat& operator-=(const bigfloat& y) { return *this = *this - y; }
|
||||
bigfloat& operator*=(const bigfloat& y) { return *this = *this * y; }
|
||||
bigfloat& operator/=(const bigfloat& y) { return *this = *this / y; }
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
a.x=x.x+y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator+(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
a.x=x.x+y;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
a.x=x.x-y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const unsigned long x, const bigfloat& y) {
|
||||
bigfloat bx(x);
|
||||
return bx-y;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat by(y);
|
||||
return x-by;
|
||||
}
|
||||
|
||||
friend bigfloat operator-(const bigfloat& x) {
|
||||
bigfloat a;
|
||||
a.x=-x.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const bigfloat& y) {
|
||||
bigfloat a;
|
||||
a.x=x.x*y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator*(const bigfloat& x, const unsigned long y) {
|
||||
bigfloat a;
|
||||
a.x=x.x*y;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const bigfloat& y){
|
||||
bigfloat a;
|
||||
a.x=x.x/y.x;
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const unsigned long x, const bigfloat& y){
|
||||
bigfloat bx(x);
|
||||
return bx/y;
|
||||
}
|
||||
|
||||
friend bigfloat operator/(const bigfloat& x, const unsigned long y){
|
||||
bigfloat by(y);
|
||||
return x/by;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
a.x= sqrt(x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat sqrt_bf(const unsigned long x){
|
||||
bigfloat a(x);
|
||||
return sqrt_bf(a);
|
||||
}
|
||||
|
||||
friend bigfloat abs_bf(const bigfloat& x){
|
||||
bigfloat a;
|
||||
a.x=fabs(x.x);
|
||||
return a;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, long power) {
|
||||
bigfloat b;
|
||||
b.x=pow(a.x,power);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat pow_bf(const bigfloat& a, bigfloat &power) {
|
||||
bigfloat b;
|
||||
b.x=pow(a.x,power.x);
|
||||
return b;
|
||||
}
|
||||
|
||||
friend bigfloat exp_bf(const bigfloat& a) {
|
||||
bigfloat b;
|
||||
b.x=exp(a.x);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Comparison Functions */
|
||||
friend int operator>(const bigfloat& x, const bigfloat& y) {
|
||||
return x.x>y.x;
|
||||
}
|
||||
|
||||
friend int operator<(const bigfloat& x, const bigfloat& y) {
|
||||
return x.x<y.x;
|
||||
}
|
||||
|
||||
friend int sgn(const bigfloat& x) {
|
||||
if ( x.x>=0 ) return 1;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
/* Miscellaneous Functions */
|
||||
|
||||
// friend bigfloat& random(void);
|
||||
};
|
||||
|
||||
|
397
Grid/algorithms/iterative/AdefGeneric.h
Normal file
397
Grid/algorithms/iterative/AdefGeneric.h
Normal file
@ -0,0 +1,397 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/AdefGeneric.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
|
||||
|
||||
/*
|
||||
* Compared to Tang-2009: P=Pleft. P^T = PRight Q=MssInv.
|
||||
* Script A = SolverMatrix
|
||||
* Script P = Preconditioner
|
||||
*
|
||||
* Deflation methods considered
|
||||
* -- Solve P A x = P b [ like Luscher ]
|
||||
* DEF-1 M P A x = M P b [i.e. left precon]
|
||||
* DEF-2 P^T M A x = P^T M b
|
||||
* ADEF-1 Preconditioner = M P + Q [ Q + M + M A Q]
|
||||
* ADEF-2 Preconditioner = P^T M + Q
|
||||
* BNN Preconditioner = P^T M P + Q
|
||||
* BNN2 Preconditioner = M P + P^TM +Q - M P A M
|
||||
*
|
||||
* Implement ADEF-2
|
||||
*
|
||||
* Vstart = P^Tx + Qb
|
||||
* M1 = P^TM + Q
|
||||
* M2=M3=1
|
||||
* Vout = x
|
||||
*/
|
||||
|
||||
// abstract base
|
||||
template<class Field, class CoarseField>
|
||||
class TwoLevelFlexiblePcg : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
int verbose;
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
const int mmax = 5;
|
||||
GridBase *grid;
|
||||
GridBase *coarsegrid;
|
||||
|
||||
LinearOperatorBase<Field> *_Linop
|
||||
OperatorFunction<Field> *_Smoother,
|
||||
LinearFunction<CoarseField> *_CoarseSolver;
|
||||
|
||||
// Need somthing that knows how to get from Coarse to fine and back again
|
||||
|
||||
// more most opertor functions
|
||||
TwoLevelFlexiblePcg(RealD tol,
|
||||
Integer maxit,
|
||||
LinearOperatorBase<Field> *Linop,
|
||||
LinearOperatorBase<Field> *SmootherLinop,
|
||||
OperatorFunction<Field> *Smoother,
|
||||
OperatorFunction<CoarseField> CoarseLinop
|
||||
) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
_Linop(Linop),
|
||||
_PreconditionerLinop(PrecLinop),
|
||||
_Preconditioner(Preconditioner)
|
||||
{
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
// The Pcg routine is common to all, but the various matrices differ from derived
|
||||
// implementation to derived implmentation
|
||||
void operator() (const Field &src, Field &psi){
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
grid = src.Grid();
|
||||
|
||||
RealD f;
|
||||
RealD rtzp,rtz,a,d,b;
|
||||
RealD rptzp;
|
||||
RealD tn;
|
||||
RealD guess = norm2(psi);
|
||||
RealD ssq = norm2(src);
|
||||
RealD rsq = ssq*Tolerance*Tolerance;
|
||||
|
||||
/////////////////////////////
|
||||
// Set up history vectors
|
||||
/////////////////////////////
|
||||
std::vector<Field> p (mmax,grid);
|
||||
std::vector<Field> mmp(mmax,grid);
|
||||
std::vector<RealD> pAp(mmax);
|
||||
|
||||
Field x (grid); x = psi;
|
||||
Field z (grid);
|
||||
Field tmp(grid);
|
||||
Field r (grid);
|
||||
Field mu (grid);
|
||||
|
||||
//////////////////////////
|
||||
// x0 = Vstart -- possibly modify guess
|
||||
//////////////////////////
|
||||
x=src;
|
||||
Vstart(x,src);
|
||||
|
||||
// r0 = b -A x0
|
||||
HermOp(x,mmp); // Shouldn't this be something else?
|
||||
axpy (r, -1.0,mmp[0], src); // Recomputes r=src-Ax0
|
||||
|
||||
//////////////////////////////////
|
||||
// Compute z = M1 x
|
||||
//////////////////////////////////
|
||||
M1(r,z,tmp,mp,SmootherMirs);
|
||||
rtzp =real(innerProduct(r,z));
|
||||
|
||||
///////////////////////////////////////
|
||||
// Solve for Mss mu = P A z and set p = z-mu
|
||||
// Def2: p = 1 - Q Az = Pright z
|
||||
// Other algos M2 is trivial
|
||||
///////////////////////////////////////
|
||||
M2(z,p[0]);
|
||||
|
||||
for (int k=0;k<=MaxIterations;k++){
|
||||
|
||||
int peri_k = k % mmax;
|
||||
int peri_kp = (k+1) % mmax;
|
||||
|
||||
rtz=rtzp;
|
||||
d= M3(p[peri_k],mp,mmp[peri_k],tmp);
|
||||
a = rtz/d;
|
||||
|
||||
// Memorise this
|
||||
pAp[peri_k] = d;
|
||||
|
||||
axpy(x,a,p[peri_k],x);
|
||||
RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
|
||||
|
||||
// Compute z = M x
|
||||
M1(r,z,tmp,mp);
|
||||
|
||||
rtzp =real(innerProduct(r,z));
|
||||
|
||||
M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
|
||||
|
||||
p[peri_kp]=p[peri_k];
|
||||
|
||||
// Standard search direction p -> z + b p ; b =
|
||||
b = (rtzp)/rtz;
|
||||
|
||||
int northog;
|
||||
// northog = (peri_kp==0)?1:peri_kp; // This is the fCG(mmax) algorithm
|
||||
northog = (k>mmax-1)?(mmax-1):k; // This is the fCG-Tr(mmax-1) algorithm
|
||||
|
||||
for(int back=0; back < northog; back++){
|
||||
int peri_back = (k-back)%mmax;
|
||||
RealD pbApk= real(innerProduct(mmp[peri_back],p[peri_kp]));
|
||||
RealD beta = -pbApk/pAp[peri_back];
|
||||
axpy(p[peri_kp],beta,p[peri_back],p[peri_kp]);
|
||||
}
|
||||
|
||||
RealD rrn=sqrt(rn/ssq);
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: k= "<<k<<" residual = "<<rrn<<std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if ( rn <= rsq ) {
|
||||
|
||||
HermOp(x,mmp); // Shouldn't this be something else?
|
||||
axpy(tmp,-1.0,src,mmp[0]);
|
||||
|
||||
RealD psinorm = sqrt(norm2(x));
|
||||
RealD srcnorm = sqrt(norm2(src));
|
||||
RealD tmpnorm = sqrt(norm2(tmp));
|
||||
RealD true_residual = tmpnorm/srcnorm;
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: true residual is "<<true_residual<<std::endl;
|
||||
std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
|
||||
return k;
|
||||
}
|
||||
}
|
||||
// Non-convergence
|
||||
assert(0);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
virtual void M(Field & in,Field & out,Field & tmp) {
|
||||
|
||||
}
|
||||
|
||||
virtual void M1(Field & in, Field & out) {// the smoother
|
||||
|
||||
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
|
||||
Field tmp(grid);
|
||||
Field Min(grid);
|
||||
|
||||
PcgM(in,Min); // Smoother call
|
||||
|
||||
HermOp(Min,out);
|
||||
axpy(tmp,-1.0,out,in); // tmp = in - A Min
|
||||
|
||||
ProjectToSubspace(tmp,PleftProj);
|
||||
ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
|
||||
PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]
|
||||
axpy(out,1.0,Min,tmp); // Min+tmp
|
||||
}
|
||||
|
||||
virtual void M2(const Field & in, Field & out) {
|
||||
out=in;
|
||||
// Must override for Def2 only
|
||||
// case PcgDef2:
|
||||
// Pright(in,out);
|
||||
// break;
|
||||
}
|
||||
|
||||
virtual RealD M3(const Field & p, Field & mmp){
|
||||
double d,dd;
|
||||
HermOpAndNorm(p,mmp,d,dd);
|
||||
return dd;
|
||||
// Must override for Def1 only
|
||||
// case PcgDef1:
|
||||
// d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
|
||||
// linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
|
||||
// Pleft(mp,mmp);
|
||||
// d=real(linop_d->inner(p,mmp));
|
||||
}
|
||||
|
||||
virtual void VstartDef2(Field & xconst Field & src){
|
||||
//case PcgDef2:
|
||||
//case PcgAdef2:
|
||||
//case PcgAdef2f:
|
||||
//case PcgV11f:
|
||||
///////////////////////////////////
|
||||
// Choose x_0 such that
|
||||
// x_0 = guess + (A_ss^inv) r_s = guess + Ass_inv [src -Aguess]
|
||||
// = [1 - Ass_inv A] Guess + Assinv src
|
||||
// = P^T guess + Assinv src
|
||||
// = Vstart [Tang notation]
|
||||
// This gives:
|
||||
// W^T (src - A x_0) = src_s - A guess_s - r_s
|
||||
// = src_s - (A guess)_s - src_s + (A guess)_s
|
||||
// = 0
|
||||
///////////////////////////////////
|
||||
Field r(grid);
|
||||
Field mmp(grid);
|
||||
|
||||
HermOp(x,mmp);
|
||||
axpy (r, -1.0, mmp, src); // r_{-1} = src - A x
|
||||
ProjectToSubspace(r,PleftProj);
|
||||
ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
PromoteFromSubspace(PleftMss_proj,mmp);
|
||||
x=x+mmp;
|
||||
|
||||
}
|
||||
|
||||
virtual void Vstart(Field & x,const Field & src){
|
||||
return;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Only Def1 has non-trivial Vout. Override in Def1
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
virtual void Vout (Field & in, Field & out,Field & src){
|
||||
out = in;
|
||||
//case PcgDef1:
|
||||
// //Qb + PT x
|
||||
// ProjectToSubspace(src,PleftProj);
|
||||
// ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
|
||||
// PromoteFromSubspace(PleftMss_proj,tmp);
|
||||
//
|
||||
// Pright(in,out);
|
||||
//
|
||||
// linop_d->axpy(out,tmp,out,1.0);
|
||||
// break;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Pright and Pleft are common to all implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
virtual void Pright(Field & in,Field & out){
|
||||
// P_R = [ 1 0 ]
|
||||
// [ -Mss^-1 Msb 0 ]
|
||||
Field in_sbar(grid);
|
||||
|
||||
ProjectToSubspace(in,PleftProj);
|
||||
PromoteFromSubspace(PleftProj,out);
|
||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||
|
||||
HermOp(in_sbar,out);
|
||||
ProjectToSubspace(out,PleftProj); // Mssbar in_sbar (project)
|
||||
|
||||
ApplyInverse (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar
|
||||
PromoteFromSubspace(PleftMss_proj,out); //
|
||||
|
||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Mss^{-1} Mssbar in_sbar
|
||||
}
|
||||
virtual void Pleft (Field & in,Field & out){
|
||||
// P_L = [ 1 -Mbs Mss^-1]
|
||||
// [ 0 0 ]
|
||||
Field in_sbar(grid);
|
||||
Field tmp2(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
ProjectToSubspace(in,PleftProj);
|
||||
PromoteFromSubspace(PleftProj,out);
|
||||
axpy(in_sbar,-1.0,out,in); // in_sbar = in - in_s
|
||||
|
||||
ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
|
||||
PromoteFromSubspace(PleftMss_proj,out);
|
||||
|
||||
HermOp(out,Mtmp);
|
||||
|
||||
ProjectToSubspace(Mtmp,PleftProj); // Msbar s Mss^{-1}
|
||||
PromoteFromSubspace(PleftProj,tmp2);
|
||||
|
||||
axpy(out,-1.0,tmp2,Mtmp);
|
||||
axpy(out,-1.0,out,in_sbar); // in_sbar - Msbars Mss^{-1} in_s
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp){
|
||||
|
||||
}
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
|
||||
|
||||
}
|
||||
virtual void M2(Field & in, Field & out){
|
||||
|
||||
}
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
|
||||
|
||||
}
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
|
||||
|
||||
}
|
||||
}
|
||||
/*
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
virtual void Vout (Field & in, Field & out,Field & src,Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
|
||||
public:
|
||||
virtual void M(Field & in,Field & out,Field & tmp);
|
||||
virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
|
||||
virtual void M2(Field & in, Field & out);
|
||||
virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
|
||||
virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
|
||||
}
|
||||
*/
|
||||
#endif
|
694
Grid/algorithms/iterative/BlockConjugateGradient.h
Normal file
694
Grid/algorithms/iterative/BlockConjugateGradient.h
Normal file
@ -0,0 +1,694 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS, BlockCGVec, BlockCGrQVec };
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <class Field>
|
||||
class BlockConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
|
||||
typedef typename Field::scalar_type scomplex;
|
||||
|
||||
int blockDim ;
|
||||
int Nblock;
|
||||
|
||||
BlockCGtype CGtype;
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer PrintInterval; //GridLogMessages or Iterative
|
||||
|
||||
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv),PrintInterval(100)
|
||||
{};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Thin QR factorisation (google it)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//Dimensions
|
||||
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
|
||||
//
|
||||
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
|
||||
//
|
||||
// Q C = R => Q = R C^{-1}
|
||||
//
|
||||
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
|
||||
//
|
||||
// Set C = L^{dag}, and then Q^dag Q = ident
|
||||
//
|
||||
// Checks:
|
||||
// Cdag C = Rdag R ; passes.
|
||||
// QdagQ = 1 ; passes
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
Field & Q,
|
||||
const Field & R)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
sliceInnerProductMatrix(m_rr,R,R,Orthog);
|
||||
|
||||
// Force manifest hermitian to avoid rounding related
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Q = R C^{-1}
|
||||
//
|
||||
// Q_j = R_i Cinv(i,j)
|
||||
//
|
||||
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
sliceMulMatrix(Q,Cinv,R,Orthog);
|
||||
}
|
||||
// see comments above
|
||||
void ThinQRfact (Eigen::MatrixXcd &m_rr,
|
||||
Eigen::MatrixXcd &C,
|
||||
Eigen::MatrixXcd &Cinv,
|
||||
std::vector<Field> & Q,
|
||||
const std::vector<Field> & R)
|
||||
{
|
||||
InnerProductMatrix(m_rr,R,R);
|
||||
|
||||
m_rr = 0.5*(m_rr+m_rr.adjoint());
|
||||
|
||||
Eigen::MatrixXcd L = m_rr.llt().matrixL();
|
||||
|
||||
C = L.adjoint();
|
||||
Cinv = C.inverse();
|
||||
|
||||
MulMatrix(Q,Cinv,R);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Call one of several implementations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQ ) {
|
||||
BlockCGrQsolve(Linop,Src,Psi);
|
||||
} else if (CGtype == CGmultiRHS ) {
|
||||
CGmultiRHSsolve(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
virtual void operator()(LinearOperatorBase<Field> &Linop, const std::vector<Field> &Src, std::vector<Field> &Psi)
|
||||
{
|
||||
if ( CGtype == BlockCGrQVec ) {
|
||||
BlockCGrQsolveVec(Linop,Src,Psi);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQ implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim; this is an assumption
|
||||
Nblock = B.Grid()->_fdimensions[Orthog];
|
||||
/* FAKE */
|
||||
Nblock=8;
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
X.checkerboard = B.checkerboard;
|
||||
conformable(X, B);
|
||||
|
||||
Field tmp(B);
|
||||
Field Q(B);
|
||||
Field D(B);
|
||||
Field Z(B);
|
||||
Field AD(B);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,B,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,B,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,X,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
|
||||
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
AD = AD-B;
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiRHS conjugate gradient. Dimension zero should be the block direction
|
||||
// Use this for spread out across nodes
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
|
||||
{
|
||||
int Orthog = blockDim; // First dimension is block dim
|
||||
Nblock = Src.Grid()->_fdimensions[Orthog];
|
||||
|
||||
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
|
||||
|
||||
Psi.checkerboard = Src.checkerboard;
|
||||
conformable(Psi, Src);
|
||||
|
||||
Field P(Src);
|
||||
Field AP(Src);
|
||||
Field R(Src);
|
||||
|
||||
std::vector<ComplexD> v_pAp(Nblock);
|
||||
std::vector<RealD> v_rr (Nblock);
|
||||
std::vector<RealD> v_rr_inv(Nblock);
|
||||
std::vector<RealD> v_alpha(Nblock);
|
||||
std::vector<RealD> v_beta(Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
sliceNorm(ssq,Src,Orthog);
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
sliceNorm(residuals,Src,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
sliceNorm(residuals,Psi,Orthog);
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
// Initial search dir is guess
|
||||
Linop.HermOp(Psi, AP);
|
||||
|
||||
R = Src - AP;
|
||||
P = R;
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch sliceNormTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
RealD rrsum=0;
|
||||
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
|
||||
|
||||
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
|
||||
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(P, AP);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
// Alpha
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductVector(v_pAp,P,AP,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
|
||||
}
|
||||
|
||||
// Psi, R update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
|
||||
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
// Beta
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_rr_inv[b] = 1.0/v_rr[b];
|
||||
}
|
||||
sliceNormTimer.Start();
|
||||
sliceNorm(v_rr,R,Orthog);
|
||||
sliceNormTimer.Stop();
|
||||
for(int b=0;b<Nblock;b++){
|
||||
v_beta[b] = v_rr_inv[b] *v_rr[b];
|
||||
}
|
||||
|
||||
// Search update
|
||||
sliceMaddTimer.Start();
|
||||
sliceMaddVector(P,v_beta,P,R,Orthog);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
RealD max_resid=0;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
RealD rr = v_rr[b]/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
Linop.HermOp(Psi, AP);
|
||||
AP = AP-Src;
|
||||
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y){
|
||||
for(int b=0;b<Nblock;b++){
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
m(b,bp) = innerProduct(X[b],Y[bp]);
|
||||
}}
|
||||
}
|
||||
void MaddMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X,const std::vector<Field> &Y,RealD scale=1.0){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
// Deal with case AP aliases with either Y or X
|
||||
std::vector<Field> tmp(Nblock,X[0]);
|
||||
for(int b=0;b<Nblock;b++){
|
||||
tmp[b] = Y[b];
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
tmp[b] = tmp[b] + (scale*m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = tmp[b];
|
||||
}
|
||||
}
|
||||
void MulMatrix(std::vector<Field> &AP, Eigen::MatrixXcd &m , const std::vector<Field> &X){
|
||||
// Should make this cache friendly with site outermost, parallel_for
|
||||
for(int b=0;b<Nblock;b++){
|
||||
AP[b] = Zero();
|
||||
for(int bp=0;bp<Nblock;bp++) {
|
||||
AP[b] += (m(bp,b))*X[bp];
|
||||
}
|
||||
}
|
||||
}
|
||||
double normv(const std::vector<Field> &P){
|
||||
double nn = 0.0;
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
nn+=norm2(P[b]);
|
||||
}
|
||||
return nn;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// BlockCGrQvec implementation:
|
||||
//--------------------------
|
||||
// X is guess/Solution
|
||||
// B is RHS
|
||||
// Solve A X_i = B_i ; i refers to Nblock index
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
void BlockCGrQsolveVec(LinearOperatorBase<Field> &Linop, const std::vector<Field> &B, std::vector<Field> &X)
|
||||
{
|
||||
Nblock = B.size();
|
||||
assert(Nblock == X.size());
|
||||
|
||||
std::cout<<GridLogMessage<<" Block Conjugate Gradient Vec rQ : Nblock "<<Nblock<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
X[b].checkerboard = B[b].checkerboard;
|
||||
conformable(X[b], B[b]);
|
||||
conformable(X[b], X[0]);
|
||||
}
|
||||
|
||||
Field Fake(B[0]);
|
||||
|
||||
std::vector<Field> tmp(Nblock,Fake);
|
||||
std::vector<Field> Q(Nblock,Fake);
|
||||
std::vector<Field> D(Nblock,Fake);
|
||||
std::vector<Field> Z(Nblock,Fake);
|
||||
std::vector<Field> AD(Nblock,Fake);
|
||||
|
||||
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
|
||||
|
||||
// Initial residual computation & set up
|
||||
std::vector<RealD> residuals(Nblock);
|
||||
std::vector<RealD> ssq(Nblock);
|
||||
|
||||
RealD sssum=0;
|
||||
for(int b=0;b<Nblock;b++){ ssq[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(B[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
for(int b=0;b<Nblock;b++){ residuals[b] = norm2(X[b]);}
|
||||
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
|
||||
|
||||
/************************************************************************
|
||||
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
|
||||
************************************************************************
|
||||
* Dimensions:
|
||||
*
|
||||
* X,B==(Nferm x Nblock)
|
||||
* A==(Nferm x Nferm)
|
||||
*
|
||||
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
|
||||
*
|
||||
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
* for k:
|
||||
* Z = AD
|
||||
* M = [D^dag Z]^{-1}
|
||||
* X = X + D MC
|
||||
* QS = Q - ZM
|
||||
* D = Q + D S^dag
|
||||
* C = S C
|
||||
*/
|
||||
///////////////////////////////////////
|
||||
// Initial block: initial search dir is guess
|
||||
///////////////////////////////////////
|
||||
std::cout << GridLogMessage<<"BlockCGrQvec algorithm initialisation " <<std::endl;
|
||||
|
||||
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
Linop.HermOp(X[b], AD[b]);
|
||||
tmp[b] = B[b] - AD[b];
|
||||
}
|
||||
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
|
||||
for(int b=0;b<Nblock;b++) D[b]=Q[b];
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ vec computed initial residual and QR fact " <<std::endl;
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch sliceInnerTimer;
|
||||
GridStopWatch sliceMaddTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
|
||||
//3. Z = AD
|
||||
MatrixTimer.Start();
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(D[b], Z[b]);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
InnerProductMatrix(m_DZ,D,Z);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(X,m_tmp, D,X);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//6. QS = Q - ZM
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(tmp,m_M,Z,Q,-1.0);
|
||||
sliceMaddTimer.Stop();
|
||||
QRTimer.Start();
|
||||
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
|
||||
QRTimer.Stop();
|
||||
|
||||
//7. D = Q + D S^dag
|
||||
m_tmp = m_S.adjoint();
|
||||
sliceMaddTimer.Start();
|
||||
MaddMatrix(D,m_tmp,D,Q);
|
||||
sliceMaddTimer.Stop();
|
||||
|
||||
//8. C = S C
|
||||
m_C = m_S*m_C;
|
||||
|
||||
/*********************
|
||||
* convergence monitor
|
||||
*********************
|
||||
*/
|
||||
m_rr = m_C.adjoint() * m_C;
|
||||
|
||||
RealD max_resid=0;
|
||||
RealD rrsum=0;
|
||||
RealD rr;
|
||||
|
||||
for(int b=0;b<Nblock;b++) {
|
||||
rrsum+=real(m_rr(b,b));
|
||||
rr = real(m_rr(b,b))/ssq[b];
|
||||
if ( rr > max_resid ) max_resid = rr;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << "\t Block Iteration "<<k<<" ave resid "<< sqrt(rrsum/sssum) << " max "<< sqrt(max_resid) <<std::endl;
|
||||
|
||||
if ( max_resid < Tolerance*Tolerance ) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++){
|
||||
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
|
||||
|
||||
for(int b=0;b<Nblock;b++) Linop.HermOp(X[b], AD[b]);
|
||||
for(int b=0;b<Nblock;b++) AD[b] = AD[b]-B[b];
|
||||
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(normv(AD)/normv(B)) <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
180
Grid/algorithms/iterative/ConjugateGradient.h
Normal file
180
Grid/algorithms/iterative/ConjugateGradient.h
Normal file
@ -0,0 +1,180 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_H
|
||||
#define GRID_CONJUGATE_GRADIENT_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Field>
|
||||
class ConjugateGradient : public OperatorFunction<Field> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
|
||||
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
Field p(src);
|
||||
Field mmp(src);
|
||||
Field r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(8) << "ConjugateGradient: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(8)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch InnerTimer;
|
||||
GridStopWatch AxpyNormTimer;
|
||||
GridStopWatch LinearCombTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations*1000; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(p, mmp);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
InnerTimer.Start();
|
||||
ComplexD dc = innerProduct(p,mmp);
|
||||
InnerTimer.Stop();
|
||||
d = dc.real();
|
||||
a = c / d;
|
||||
|
||||
AxpyNormTimer.Start();
|
||||
cp = axpy_norm(r, -a, mmp, r);
|
||||
AxpyNormTimer.Stop();
|
||||
b = cp / c;
|
||||
|
||||
LinearCombTimer.Start();
|
||||
auto psi_v = psi.View();
|
||||
auto p_v = p.View();
|
||||
auto r_v = r.View();
|
||||
parallel_for(int ss=0;ss<src.Grid()->oSites();ss++){
|
||||
vstream(psi_v[ss], a * p_v[ss] + psi_v[ss]);
|
||||
vstream(p_v [ss], b * p_v[ss] + r_v[ss]);
|
||||
}
|
||||
LinearCombTimer.Stop();
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
<< " residual^2 " << sqrt(cp/ssq) << " target " << Tolerance << std::endl;
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
SolverTimer.Stop();
|
||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = std::sqrt(norm2(src));
|
||||
RealD resnorm = std::sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
156
Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
156
Grid/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
@ -0,0 +1,156 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
//Mixed precision restarted defect correction CG
|
||||
template<class FieldD,class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
|
||||
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||
LinearFunction<FieldF> *guesser;
|
||||
|
||||
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
|
||||
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||
|
||||
void useGuesser(LinearFunction<FieldF> &g){
|
||||
guesser = &g;
|
||||
}
|
||||
|
||||
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||
TotalInnerIterations = 0;
|
||||
|
||||
GridStopWatch TotalTimer;
|
||||
TotalTimer.Start();
|
||||
|
||||
int cb = src_d_in.Checkerboard();
|
||||
sol_d.Checkerboard() = cb;
|
||||
|
||||
RealD src_norm = norm2(src_d_in);
|
||||
RealD stop = src_norm * Tolerance*Tolerance;
|
||||
|
||||
GridBase* DoublePrecGrid = src_d_in.Grid();
|
||||
FieldD tmp_d(DoublePrecGrid);
|
||||
tmp_d.Checkerboard() = cb;
|
||||
|
||||
FieldD tmp2_d(DoublePrecGrid);
|
||||
tmp2_d.Checkerboard() = cb;
|
||||
|
||||
FieldD src_d(DoublePrecGrid);
|
||||
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||
|
||||
RealD inner_tol = InnerTolerance;
|
||||
|
||||
FieldF src_f(SinglePrecGrid);
|
||||
src_f.Checkerboard() = cb;
|
||||
|
||||
FieldF sol_f(SinglePrecGrid);
|
||||
sol_f.Checkerboard() = cb;
|
||||
|
||||
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||
CG_f.ErrorOnNoConverge = false;
|
||||
|
||||
GridStopWatch InnerCGtimer;
|
||||
|
||||
GridStopWatch PrecChangeTimer;
|
||||
|
||||
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
|
||||
|
||||
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||
//Compute double precision rsd and also new RHS vector.
|
||||
Linop_d.HermOp(sol_d, tmp_d);
|
||||
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||
|
||||
if(norm < OuterLoopNormMult * stop){
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||
break;
|
||||
}
|
||||
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(src_f, src_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
sol_f = Zero();
|
||||
|
||||
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||
if(guesser != NULL)
|
||||
(*guesser)(src_f, sol_f);
|
||||
|
||||
//Inner CG
|
||||
CG_f.Tolerance = inner_tol;
|
||||
InnerCGtimer.Start();
|
||||
CG_f(Linop_f, src_f, sol_f);
|
||||
InnerCGtimer.Stop();
|
||||
TotalInnerIterations += CG_f.IterationsToComplete;
|
||||
|
||||
//Convert sol back to double and add to double prec solution
|
||||
PrecChangeTimer.Start();
|
||||
precisionChange(tmp_d, sol_f);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||
}
|
||||
|
||||
//Final trial CG
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||
|
||||
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||
CG_d(Linop_d, src_d_in, sol_d);
|
||||
TotalFinalStepIterations = CG_d.IterationsToComplete;
|
||||
|
||||
TotalTimer.Stop();
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
|
||||
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
322
Grid/algorithms/iterative/ConjugateGradientMultiShift.h
Normal file
322
Grid/algorithms/iterative/ConjugateGradientMultiShift.h
Normal file
@ -0,0 +1,322 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShift.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||
#define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateGradientMultiShift : public OperatorMultiFunction<Field>,
|
||||
public OperatorFunction<Field>
|
||||
{
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
int verbose;
|
||||
MultiShiftFunction shifts;
|
||||
|
||||
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
|
||||
MaxIterations(maxit),
|
||||
shifts(_shifts)
|
||||
{
|
||||
verbose=1;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
int nshift = shifts.order;
|
||||
std::vector<Field> results(nshift,grid);
|
||||
(*this)(Linop,src,results,psi);
|
||||
}
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
|
||||
{
|
||||
int nshift = shifts.order;
|
||||
|
||||
(*this)(Linop,src,results);
|
||||
|
||||
psi = shifts.norm*src;
|
||||
for(int i=0;i<nshift;i++){
|
||||
psi = psi + shifts.residues[i]*results[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &psi)
|
||||
{
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Convenience references to the info stored in "MultiShiftFunction"
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
int nshift = shifts.order;
|
||||
|
||||
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
|
||||
std::vector<RealD> &mresidual(shifts.tolerances);
|
||||
std::vector<RealD> alpha(nshift,1.0);
|
||||
std::vector<Field> ps(nshift,grid);// Search directions
|
||||
|
||||
assert(psi.size()==nshift);
|
||||
assert(mass.size()==nshift);
|
||||
assert(mresidual.size()==nshift);
|
||||
|
||||
// dynamic sized arrays on stack; 2d is a pain with vector
|
||||
RealD bs[nshift];
|
||||
RealD rsq[nshift];
|
||||
RealD z[nshift][2];
|
||||
int converged[nshift];
|
||||
|
||||
const int primary =0;
|
||||
|
||||
//Primary shift fields CG iteration
|
||||
RealD a,b,c,d;
|
||||
RealD cp,bp,qq; //prev
|
||||
|
||||
// Matrix mult fields
|
||||
Field r(grid);
|
||||
Field p(grid);
|
||||
Field tmp(grid);
|
||||
Field mmp(grid);
|
||||
|
||||
// Check lightest mass
|
||||
for(int s=0;s<nshift;s++){
|
||||
assert( mass[s]>= mass[primary] );
|
||||
converged[s]=0;
|
||||
}
|
||||
|
||||
// Wire guess to zero
|
||||
// Residuals "r" are src
|
||||
// First search direction "p" is also src
|
||||
cp = norm2(src);
|
||||
for(int s=0;s<nshift;s++){
|
||||
rsq[s] = cp * mresidual[s] * mresidual[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift: shift "<<s
|
||||
<<" target resid "<<rsq[s]<<std::endl;
|
||||
ps[s] = src;
|
||||
}
|
||||
// r and p for primary
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
//MdagM+m[0]
|
||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
// have verified that inner product of
|
||||
// p and mmp is equal to d after this since
|
||||
// the d computation is tricky
|
||||
// qq = real(innerProduct(p,mmp));
|
||||
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
|
||||
|
||||
b = -cp /d;
|
||||
|
||||
// Set up the various shift variables
|
||||
int iz=0;
|
||||
z[0][1-iz] = 1.0;
|
||||
z[0][iz] = 1.0;
|
||||
bs[0] = b;
|
||||
for(int s=1;s<nshift;s++){
|
||||
z[s][1-iz] = 1.0;
|
||||
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
|
||||
bs[s] = b*z[s][iz];
|
||||
}
|
||||
|
||||
// r += b[0] A.p[0]
|
||||
// c= norm(r)
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi[s],0.,-bs[s]*alpha[s],src,src);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// Timers
|
||||
///////////////////////////////////////
|
||||
GridStopWatch AXPYTimer;
|
||||
GridStopWatch ShiftTimer;
|
||||
GridStopWatch QRTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
// Iteration loop
|
||||
int k;
|
||||
|
||||
for (k=1;k<=MaxIterations;k++){
|
||||
|
||||
a = c /cp;
|
||||
AXPYTimer.Start();
|
||||
axpy(p,a,p,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Note to self - direction ps is iterated seperately
|
||||
// for each shift. Does not appear to have any scope
|
||||
// for avoiding linear algebra in "single" case.
|
||||
//
|
||||
// However SAME r is used. Could load "r" and update
|
||||
// ALL ps[s]. 2/3 Bandwidth saving
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
AXPYTimer.Start();
|
||||
for(int s=0;s<nshift;s++){
|
||||
if ( ! converged[s] ) {
|
||||
if (s==0){
|
||||
axpy(ps[s],a,ps[s],r);
|
||||
} else{
|
||||
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
|
||||
axpby(ps[s],z[s][iz],as,r,ps[s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
AXPYTimer.Stop();
|
||||
|
||||
cp=c;
|
||||
MatrixTimer.Start();
|
||||
//Linop.HermOpAndNorm(p,mmp,d,qq); // d is used
|
||||
// The below is faster on KNL
|
||||
Linop.HermOp(p,mmp);
|
||||
d=real(innerProduct(p,mmp));
|
||||
|
||||
MatrixTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
axpy(mmp,mass[0],p,mmp);
|
||||
AXPYTimer.Stop();
|
||||
RealD rn = norm2(p);
|
||||
d += rn*mass[0];
|
||||
|
||||
bp=b;
|
||||
b=-cp/d;
|
||||
|
||||
AXPYTimer.Start();
|
||||
c=axpy_norm(r,b,mmp,r);
|
||||
AXPYTimer.Stop();
|
||||
|
||||
// Toggle the recurrence history
|
||||
bs[0] = b;
|
||||
iz = 1-iz;
|
||||
ShiftTimer.Start();
|
||||
for(int s=1;s<nshift;s++){
|
||||
if((!converged[s])){
|
||||
RealD z0 = z[s][1-iz];
|
||||
RealD z1 = z[s][iz];
|
||||
z[s][iz] = z0*z1*bp
|
||||
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
|
||||
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
|
||||
}
|
||||
}
|
||||
ShiftTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
int ss = s;
|
||||
// Scope for optimisation here in case of "single".
|
||||
// Could load psi[0] and pull all ps[s] in.
|
||||
// if ( single ) ss=primary;
|
||||
// Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving
|
||||
// Pipelined CG gain:
|
||||
//
|
||||
// New Kernel: Load r, vector of coeffs, vector of pointers ps
|
||||
// New Kernel: Load psi[0], vector of coeffs, vector of pointers ps
|
||||
// If can predict the coefficient bs then we can fuse these and avoid write reread cyce
|
||||
// on ps[s].
|
||||
// Before: 3 x npole + 3 x npole
|
||||
// After : 2 x npole (ps[s]) => 3x speed up of multishift CG.
|
||||
|
||||
if( (!converged[s]) ) {
|
||||
axpy(psi[ss],-bs[s]*alpha[s],ps[s],psi[ss]);
|
||||
}
|
||||
}
|
||||
|
||||
// Convergence checks
|
||||
int all_converged = 1;
|
||||
for(int s=0;s<nshift;s++){
|
||||
|
||||
if ( (!converged[s]) ){
|
||||
|
||||
RealD css = c * z[s][iz]* z[s][iz];
|
||||
|
||||
if(css<rsq[s]){
|
||||
if ( ! converged[s] )
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
|
||||
converged[s]=1;
|
||||
} else {
|
||||
all_converged=0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( all_converged ){
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
|
||||
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
|
||||
|
||||
// Check answers
|
||||
for(int s=0; s < nshift; s++) {
|
||||
Linop.HermOpAndNorm(psi[s],mmp,d,qq);
|
||||
axpy(tmp,mass[s],psi[s],mmp);
|
||||
axpy(r,-alpha[s],src,tmp);
|
||||
RealD rn = norm2(r);
|
||||
RealD cn = norm2(src);
|
||||
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tAXPY " << AXPYTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMarix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tShift " << ShiftTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// ugly hack
|
||||
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
|
||||
// assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
258
Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h
Normal file
258
Grid/algorithms/iterative/ConjugateGradientReliableUpdate.h
Normal file
@ -0,0 +1,258 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateGradientReliableUpdate.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
#define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class FieldD,class FieldF,
|
||||
typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,
|
||||
typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||
class ConjugateGradientReliableUpdate : public LinearFunction<FieldD> {
|
||||
public:
|
||||
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
|
||||
Integer ReliableUpdatesPerformed;
|
||||
|
||||
bool DoFinalCleanup; //Final DP cleanup, defaults to true
|
||||
Integer IterationsToCleanup; //Final DP cleanup step iterations
|
||||
|
||||
LinearOperatorBase<FieldF> &Linop_f;
|
||||
LinearOperatorBase<FieldD> &Linop_d;
|
||||
GridBase* SinglePrecGrid;
|
||||
RealD Delta; //reliable update parameter
|
||||
|
||||
//Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single
|
||||
LinearOperatorBase<FieldF> *Linop_fallback;
|
||||
RealD fallback_transition_tol;
|
||||
|
||||
|
||||
ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Delta(_delta),
|
||||
Linop_f(_Linop_f),
|
||||
Linop_d(_Linop_d),
|
||||
SinglePrecGrid(_sp_grid),
|
||||
ErrorOnNoConverge(err_on_no_conv),
|
||||
DoFinalCleanup(true),
|
||||
Linop_fallback(NULL)
|
||||
{};
|
||||
|
||||
void setFallbackLinop(LinearOperatorBase<FieldF> &_Linop_fallback, const RealD _fallback_transition_tol){
|
||||
Linop_fallback = &_Linop_fallback;
|
||||
fallback_transition_tol = _fallback_transition_tol;
|
||||
}
|
||||
|
||||
void operator()(const FieldD &src, FieldD &psi) {
|
||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||
bool using_fallback = false;
|
||||
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
conformable(psi, src);
|
||||
|
||||
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||
|
||||
FieldD p(src);
|
||||
FieldD mmp(src);
|
||||
FieldD r(src);
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
// Check if guess is really REALLY good :)
|
||||
if (cp <= rsq) {
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n";
|
||||
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
//Single prec initialization
|
||||
FieldF r_f(SinglePrecGrid);
|
||||
r_f.Checkerboard() = r.Checkerboard();
|
||||
precisionChange(r_f, r);
|
||||
|
||||
FieldF psi_f(r_f);
|
||||
psi_f = Zero();
|
||||
|
||||
FieldF p_f(r_f);
|
||||
FieldF mmp_f(r_f);
|
||||
|
||||
RealD MaxResidSinceLastRelUp = cp; //initial residual
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
|
||||
|
||||
GridStopWatch LinalgTimer;
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
SolverTimer.Start();
|
||||
int k = 0;
|
||||
int l = 0;
|
||||
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
c = cp;
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq);
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
|
||||
cp = axpy_norm(r_f, -a, mmp_f, r_f);
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi_f = a * p_f + psi_f;
|
||||
//p_f = p_f * b + r_f;
|
||||
|
||||
LinalgTimer.Stop();
|
||||
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k
|
||||
<< " residual " << cp << " target " << rsq << std::endl;
|
||||
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
|
||||
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
|
||||
|
||||
if(cp > MaxResidSinceLastRelUp){
|
||||
std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl;
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
}
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
//Although not written in the paper, I assume that I have to add on the final solution
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
|
||||
SolverTimer.Stop();
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
RealD srcnorm = std::sqrt(norm2(src));
|
||||
RealD resnorm = std::sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl;
|
||||
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
|
||||
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Time breakdown "<<std::endl;
|
||||
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
|
||||
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
|
||||
if(DoFinalCleanup){
|
||||
//Do a final CG to cleanup
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate performing final cleanup.\n";
|
||||
ConjugateGradient<FieldD> CG(Tolerance,MaxIterations);
|
||||
CG.ErrorOnNoConverge = ErrorOnNoConverge;
|
||||
CG(Linop_d,src,psi);
|
||||
IterationsToCleanup = CG.IterationsToComplete;
|
||||
}
|
||||
else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n";
|
||||
return;
|
||||
}
|
||||
else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate "
|
||||
<< cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n";
|
||||
precisionChange(mmp, psi_f);
|
||||
psi = psi + mmp;
|
||||
|
||||
Linop_d.HermOpAndNorm(psi, mmp, d, qq);
|
||||
r = src - mmp;
|
||||
|
||||
psi_f = Zero();
|
||||
precisionChange(r_f, r);
|
||||
cp = norm2(r);
|
||||
MaxResidSinceLastRelUp = cp;
|
||||
|
||||
b = cp/c;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl;
|
||||
|
||||
l = l+1;
|
||||
}
|
||||
|
||||
p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence
|
||||
|
||||
if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl;
|
||||
Linop_f_use = Linop_fallback;
|
||||
using_fallback = true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge"
|
||||
<< std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(0);
|
||||
IterationsToComplete = k;
|
||||
ReliableUpdatesPerformed = l;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
|
||||
#endif
|
111
Grid/algorithms/iterative/ConjugateResidual.h
Normal file
111
Grid/algorithms/iterative/ConjugateResidual.h
Normal file
@ -0,0 +1,111 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_CONJUGATE_RESIDUAL_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class ConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
|
||||
ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
||||
verbose=0;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b; // c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
psi=Zero();
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid);
|
||||
|
||||
r=src;
|
||||
p=src;
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
if (verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=1;k<MaxIterations;k++){
|
||||
|
||||
a = rAr/pAAp;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
|
||||
cp = axpy_norm(r,-a,Ap,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
if(verbose) std::cout<<GridLogMessage<<"ConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r)/ssq;
|
||||
std::cout<<GridLogMessage<<"ConjugateResidual: Converged on iteration " <<k
|
||||
<< " computed residual "<<std::sqrt(cp/ssq)
|
||||
<< " true residual "<<std::sqrt(true_resid)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"ConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
104
Grid/algorithms/iterative/Deflation.h
Normal file
104
Grid/algorithms/iterative/Deflation.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_DEFLATION_H
|
||||
#define GRID_DEFLATION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class Field>
|
||||
class ZeroGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = Zero(); };
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class SourceGuesser: public LinearFunction<Field> {
|
||||
public:
|
||||
virtual void operator()(const Field &src, Field &guess) { guess = src; };
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Fine grid deflation
|
||||
////////////////////////////////
|
||||
template<class Field>
|
||||
class DeflatedGuesser: public LinearFunction<Field> {
|
||||
private:
|
||||
const std::vector<Field> &evec;
|
||||
const std::vector<RealD> &eval;
|
||||
|
||||
public:
|
||||
|
||||
DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
|
||||
|
||||
virtual void operator()(const Field &src,Field &guess) {
|
||||
guess = Zero();
|
||||
assert(evec.size()==eval.size());
|
||||
auto N = evec.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
const Field& tmp = evec[i];
|
||||
axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
|
||||
}
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
}
|
||||
};
|
||||
|
||||
template<class FineField, class CoarseField>
|
||||
class LocalCoherenceDeflatedGuesser: public LinearFunction<FineField> {
|
||||
private:
|
||||
const std::vector<FineField> &subspace;
|
||||
const std::vector<CoarseField> &evec_coarse;
|
||||
const std::vector<RealD> &eval_coarse;
|
||||
public:
|
||||
|
||||
LocalCoherenceDeflatedGuesser(const std::vector<FineField> &_subspace,
|
||||
const std::vector<CoarseField> &_evec_coarse,
|
||||
const std::vector<RealD> &_eval_coarse)
|
||||
: subspace(_subspace),
|
||||
evec_coarse(_evec_coarse),
|
||||
eval_coarse(_eval_coarse)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const FineField &src,FineField &guess) {
|
||||
int N = (int)evec_coarse.size();
|
||||
CoarseField src_coarse(evec_coarse[0].Grid());
|
||||
CoarseField guess_coarse(evec_coarse[0].Grid()); guess_coarse = Zero();
|
||||
blockProject(src_coarse,src,subspace);
|
||||
for (int i=0;i<N;i++) {
|
||||
const CoarseField & tmp = evec_coarse[i];
|
||||
axpy(guess_coarse,TensorRemove(innerProduct(tmp,src_coarse)) / eval_coarse[i],tmp,guess_coarse);
|
||||
}
|
||||
blockPromote(guess_coarse,guess,subspace);
|
||||
guess.Checkerboard() = src.Checkerboard();
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
#endif
|
863
Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
863
Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h
Normal file
@ -0,0 +1,863 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BIRL_H
|
||||
#define GRID_BIRL_H
|
||||
|
||||
#include <string.h> //memset
|
||||
//#include <zlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Move following 100 LOC to lattice/Lattice_basis.h
|
||||
////////////////////////////////////////////////////////
|
||||
template<class Field>
|
||||
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
|
||||
{
|
||||
for(int j=0; j<k; ++j){
|
||||
auto ip = innerProduct(basis[j],w);
|
||||
w = w - ip*basis[j];
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisRotate(std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef decltype(basis[0].View()) View;
|
||||
auto tmp_v = basis[0].View();
|
||||
std::vector<View> basis_v(basis.size(),tmp_v);
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
for(int k=0;k<basis.size();k++){
|
||||
basis_v[k] = basis[k].View();
|
||||
}
|
||||
|
||||
thread_region
|
||||
{
|
||||
std::vector < vobj , commAllocator<vobj> > B(Nm); // Thread private
|
||||
thread_loop_in_region( (int ss=0;ss < grid->oSites();ss++),{
|
||||
for(int j=j0; j<j1; ++j) B[j]=0.;
|
||||
|
||||
for(int j=j0; j<j1; ++j){
|
||||
for(int k=k0; k<k1; ++k){
|
||||
B[j] +=Qt(j,k) * basis_v[k][ss];
|
||||
}
|
||||
}
|
||||
for(int j=j0; j<j1; ++j){
|
||||
basis_v[j][ss] = B[j];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Extract a single rotated vector
|
||||
template<class Field>
|
||||
void basisRotateJ(Field &result,std::vector<Field> &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm)
|
||||
{
|
||||
typedef typename Field::vector_object vobj;
|
||||
GridBase* grid = basis[0].Grid();
|
||||
|
||||
result.Checkerboard() = basis[0].Checkerboard();
|
||||
auto result_v=result.View();
|
||||
thread_loop( (int ss=0;ss < grid->oSites();ss++),{
|
||||
vobj B = Zero();
|
||||
for(int k=k0; k<k1; ++k){
|
||||
auto basis_k = basis[k].View();
|
||||
B +=Qt(j,k) * basis_k[ss];
|
||||
}
|
||||
result_v[ss] = B;
|
||||
});
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisReorderInPlace(std::vector<Field> &_v,std::vector<RealD>& sort_vals, std::vector<int>& idx)
|
||||
{
|
||||
int vlen = idx.size();
|
||||
|
||||
assert(vlen>=1);
|
||||
assert(vlen<=sort_vals.size());
|
||||
assert(vlen<=_v.size());
|
||||
|
||||
for (size_t i=0;i<vlen;i++) {
|
||||
|
||||
if (idx[i] != i) {
|
||||
|
||||
//////////////////////////////////////
|
||||
// idx[i] is a table of desired sources giving a permutation.
|
||||
// Swap v[i] with v[idx[i]].
|
||||
// Find j>i for which _vnew[j] = _vold[i],
|
||||
// track the move idx[j] => idx[i]
|
||||
// track the move idx[i] => i
|
||||
//////////////////////////////////////
|
||||
size_t j;
|
||||
for (j=i;j<idx.size();j++)
|
||||
if (idx[j]==i)
|
||||
break;
|
||||
|
||||
assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i);
|
||||
|
||||
swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy
|
||||
std::swap(sort_vals[i],sort_vals[idx[i]]);
|
||||
|
||||
idx[j] = idx[i];
|
||||
idx[i] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<int> basisSortGetIndex(std::vector<RealD>& sort_vals)
|
||||
{
|
||||
std::vector<int> idx(sort_vals.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) {
|
||||
return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]);
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void basisSortInPlace(std::vector<Field> & _v,std::vector<RealD>& sort_vals, bool reverse)
|
||||
{
|
||||
std::vector<int> idx = basisSortGetIndex(sort_vals);
|
||||
if (reverse)
|
||||
std::reverse(idx.begin(), idx.end());
|
||||
|
||||
basisReorderInPlace(_v,sort_vals,idx);
|
||||
}
|
||||
|
||||
// PAB: faster to compute the inner products first then fuse loops.
|
||||
// If performance critical can improve.
|
||||
template<class Field>
|
||||
void basisDeflate(const std::vector<Field> &_v,const std::vector<RealD>& eval,const Field& src_orig,Field& result) {
|
||||
result = Zero();
|
||||
assert(_v.size()==eval.size());
|
||||
int N = (int)_v.size();
|
||||
for (int i=0;i<N;i++) {
|
||||
Field& tmp = _v[i];
|
||||
axpy(result,TensorRemove(innerProduct(tmp,src_orig)) / eval[i],tmp,result);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Implicitly restarted lanczos
|
||||
/////////////////////////////////////////////////////////////
|
||||
template<class Field> class ImplicitlyRestartedLanczosTester
|
||||
{
|
||||
public:
|
||||
virtual int TestConvergence(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
|
||||
virtual int ReconstructEval(int j,RealD resid,Field &evec, RealD &eval,RealD evalMaxApprox)=0;
|
||||
};
|
||||
|
||||
enum IRLdiagonalisation {
|
||||
IRLdiagonaliseWithDSTEGR,
|
||||
IRLdiagonaliseWithQR,
|
||||
IRLdiagonaliseWithEigen
|
||||
};
|
||||
|
||||
template<class Field> class ImplicitlyRestartedLanczosHermOpTester : public ImplicitlyRestartedLanczosTester<Field>
|
||||
{
|
||||
public:
|
||||
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosHermOpTester(LinearFunction<Field> &HermOp) : _HermOp(HermOp) { };
|
||||
int ReconstructEval(int j,RealD resid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
return TestConvergence(j,resid,B,eval,evalMaxApprox);
|
||||
}
|
||||
int TestConvergence(int j,RealD eresid,Field &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
Field v(B);
|
||||
RealD eval_poly = eval;
|
||||
// Apply operator
|
||||
_HermOp(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval = vnum/vden;
|
||||
v -= eval*B;
|
||||
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
|
||||
int conv=0;
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
|
||||
return conv;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field>
|
||||
class ImplicitlyRestartedLanczos {
|
||||
private:
|
||||
const RealD small = 1.0e-8;
|
||||
int MaxIter;
|
||||
int MinRestart; // Minimum number of restarts; only check for convergence after
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
// int Np; // Np -- Number of spare vecs in krylov space // == Nm - Nk
|
||||
int Nm; // Nm -- total number of vectors
|
||||
IRLdiagonalisation diagonalisation;
|
||||
int orth_period;
|
||||
|
||||
RealD OrthoTime;
|
||||
RealD eresid, betastp;
|
||||
////////////////////////////////
|
||||
// Embedded objects
|
||||
////////////////////////////////
|
||||
LinearFunction<Field> &_PolyOp;
|
||||
LinearFunction<Field> &_HermOp;
|
||||
ImplicitlyRestartedLanczosTester<Field> &_Tester;
|
||||
// Default tester provided (we need a ref to something in default case)
|
||||
ImplicitlyRestartedLanczosHermOpTester<Field> SimpleTester;
|
||||
/////////////////////////
|
||||
// Constructor
|
||||
/////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// PAB:
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// Too many options & knobs.
|
||||
// Eliminate:
|
||||
// orth_period
|
||||
// betastp
|
||||
// MinRestart
|
||||
//
|
||||
// Do we really need orth_period
|
||||
// What is the theoretical basis & guarantees of betastp ?
|
||||
// Nstop=Nk viable?
|
||||
// MinRestart avoidable with new convergence test?
|
||||
// Could cut to PolyOp, HermOp, Tester, Nk, Nm, resid, maxiter (+diagonalisation)
|
||||
// HermOp could be eliminated if we dropped the Power method for max eval.
|
||||
// -- also: The eval, eval2, eval2_copy stuff is still unnecessarily unclear
|
||||
//////////////////////////////////////////////////////////////////
|
||||
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
|
||||
LinearFunction<Field> & HermOp,
|
||||
ImplicitlyRestartedLanczosTester<Field> & Tester,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _MaxIter, // Max iterations
|
||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||
int _MinRestart=1, int _orth_period = 1,
|
||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
|
||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||
eresid(_eresid), betastp(_betastp),
|
||||
MaxIter(_MaxIter) , MinRestart(_MinRestart),
|
||||
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
|
||||
|
||||
ImplicitlyRestartedLanczos(LinearFunction<Field> & PolyOp,
|
||||
LinearFunction<Field> & HermOp,
|
||||
int _Nstop, // sought vecs
|
||||
int _Nk, // sought vecs
|
||||
int _Nm, // spare vecs
|
||||
RealD _eresid, // resid in lmdue deficit
|
||||
int _MaxIter, // Max iterations
|
||||
RealD _betastp=0.0, // if beta(k) < betastp: converged
|
||||
int _MinRestart=1, int _orth_period = 1,
|
||||
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
|
||||
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(SimpleTester),
|
||||
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
|
||||
eresid(_eresid), betastp(_betastp),
|
||||
MaxIter(_MaxIter) , MinRestart(_MinRestart),
|
||||
orth_period(_orth_period), diagonalisation(_diagonalisation) { };
|
||||
|
||||
////////////////////////////////
|
||||
// Helpers
|
||||
////////////////////////////////
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = std::sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
|
||||
void orthogonalize(Field& w, std::vector<Field>& evec,int k)
|
||||
{
|
||||
OrthoTime-=usecond()/1e6;
|
||||
basisOrthogonalize(evec,w,k);
|
||||
normalise(w);
|
||||
OrthoTime+=usecond()/1e6;
|
||||
}
|
||||
|
||||
/* Rudy Arthur's thesis pp.137
|
||||
------------------------
|
||||
Require: M > K P = M − K †
|
||||
Compute the factorization AVM = VM HM + fM eM
|
||||
repeat
|
||||
Q=I
|
||||
for i = 1,...,P do
|
||||
QiRi =HM −θiI Q = QQi
|
||||
H M = Q †i H M Q i
|
||||
end for
|
||||
βK =HM(K+1,K) σK =Q(M,K)
|
||||
r=vK+1βK +rσK
|
||||
VK =VM(1:M)Q(1:M,1:K)
|
||||
HK =HM(1:K,1:K)
|
||||
→AVK =VKHK +fKe†K † Extend to an M = K + P step factorization AVM = VMHM + fMeM
|
||||
until convergence
|
||||
*/
|
||||
void calc(std::vector<RealD>& eval, std::vector<Field>& evec, const Field& src, int& Nconv, bool reverse=false)
|
||||
{
|
||||
GridBase *grid = src.Grid();
|
||||
assert(grid == evec[0].Grid());
|
||||
|
||||
GridLogIRL.TimingMode(1);
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- seek Nk = " << Nk <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- accept Nstop = " << Nstop <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- total Nm = " << Nm <<" vectors"<< std::endl;
|
||||
std::cout << GridLogIRL <<" -- size of eval = " << eval.size() << std::endl;
|
||||
std::cout << GridLogIRL <<" -- size of evec = " << evec.size() << std::endl;
|
||||
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is DSTEGR "<<std::endl;
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is QR "<<std::endl;
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
|
||||
std::cout << GridLogIRL << "Diagonalisation is Eigen "<<std::endl;
|
||||
}
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
|
||||
assert(Nm <= evec.size() && Nm <= eval.size());
|
||||
|
||||
// quickly get an idea of the largest eigenvalue to more properly normalize the residuum
|
||||
RealD evalMaxApprox = 0.0;
|
||||
{
|
||||
auto src_n = src;
|
||||
auto tmp = src;
|
||||
const int _MAX_ITER_IRL_MEVAPP_ = 50;
|
||||
for (int i=0;i<_MAX_ITER_IRL_MEVAPP_;i++) {
|
||||
normalise(src_n);
|
||||
_HermOp(src_n,tmp);
|
||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
if (fabs(evalMaxApprox/na - 1.0) < 0.05)
|
||||
i=_MAX_ITER_IRL_MEVAPP_;
|
||||
evalMaxApprox = na;
|
||||
std::cout << GridLogIRL << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
|
||||
src_n = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<RealD> lme(Nm);
|
||||
std::vector<RealD> lme2(Nm);
|
||||
std::vector<RealD> eval2(Nm);
|
||||
std::vector<RealD> eval2_copy(Nm);
|
||||
Eigen::MatrixXd Qt = Eigen::MatrixXd::Zero(Nm,Nm);
|
||||
|
||||
Field f(grid);
|
||||
Field v(grid);
|
||||
int k1 = 1;
|
||||
int k2 = Nk;
|
||||
RealD beta_k;
|
||||
|
||||
Nconv = 0;
|
||||
|
||||
// Set initial vector
|
||||
evec[0] = src;
|
||||
normalise(evec[0]);
|
||||
|
||||
// Initial Nk steps
|
||||
OrthoTime=0.;
|
||||
for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
std::cout<<GridLogIRL <<"Initial "<< Nk <<"steps done "<<std::endl;
|
||||
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// Restarting loop begins
|
||||
//////////////////////////////////
|
||||
int iter;
|
||||
for(iter = 0; iter<MaxIter; ++iter){
|
||||
|
||||
OrthoTime=0.;
|
||||
|
||||
std::cout<< GridLogMessage <<" **********************"<< std::endl;
|
||||
std::cout<< GridLogMessage <<" Restart iteration = "<< iter << std::endl;
|
||||
std::cout<< GridLogMessage <<" **********************"<< std::endl;
|
||||
|
||||
std::cout<<GridLogIRL <<" running "<<Nm-Nk <<" steps: "<<std::endl;
|
||||
for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
|
||||
f *= lme[Nm-1];
|
||||
|
||||
std::cout<<GridLogIRL <<" "<<Nm-Nk <<" steps done "<<std::endl;
|
||||
std::cout<<GridLogIRL <<"Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// getting eigenvalues
|
||||
//////////////////////////////////
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k+k1-1];
|
||||
lme2[k] = lme[k+k1-1];
|
||||
}
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
|
||||
std::cout<<GridLogIRL <<" diagonalized "<<std::endl;
|
||||
|
||||
//////////////////////////////////
|
||||
// sorting
|
||||
//////////////////////////////////
|
||||
eval2_copy = eval2;
|
||||
std::partial_sort(eval2.begin(),eval2.begin()+Nm,eval2.end(),std::greater<RealD>());
|
||||
std::cout<<GridLogIRL <<" evals sorted "<<std::endl;
|
||||
const int chunk=8;
|
||||
for(int io=0; io<k2;io+=chunk){
|
||||
std::cout<<GridLogIRL << "eval "<< std::setw(3) << io ;
|
||||
for(int ii=0;ii<chunk;ii++){
|
||||
if ( (io+ii)<k2 )
|
||||
std::cout<< " "<< std::setw(12)<< eval2[io+ii];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
//////////////////////////////////
|
||||
// Implicitly shifted QR transformations
|
||||
//////////////////////////////////
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
for(int ip=k2; ip<Nm; ++ip){
|
||||
QR_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
|
||||
}
|
||||
std::cout<<GridLogIRL <<"QR decomposed "<<std::endl;
|
||||
|
||||
assert(k2<Nm); assert(k2<Nm); assert(k1>0);
|
||||
|
||||
basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis
|
||||
std::cout<<GridLogIRL <<"basisRotated by Qt *"<<k1-1<<","<<k2+1<<")"<<std::endl;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Compressed vector f and beta(k2)
|
||||
////////////////////////////////////////////////////
|
||||
f *= Qt(k2-1,Nm-1);
|
||||
f += lme[k2-1] * evec[k2];
|
||||
beta_k = norm2(f);
|
||||
beta_k = std::sqrt(beta_k);
|
||||
std::cout<<GridLogIRL<<" beta(k) = "<<beta_k<<std::endl;
|
||||
|
||||
RealD betar = 1.0/beta_k;
|
||||
evec[k2] = betar * f;
|
||||
lme[k2-1] = beta_k;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// Convergence test
|
||||
////////////////////////////////////////////////////
|
||||
for(int k=0; k<Nm; ++k){
|
||||
eval2[k] = eval[k];
|
||||
lme2[k] = lme[k];
|
||||
}
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
|
||||
std::cout<<GridLogIRL <<" Diagonalized "<<std::endl;
|
||||
|
||||
Nconv = 0;
|
||||
if (iter >= MinRestart) {
|
||||
|
||||
std::cout << GridLogIRL << "Test convergence: rotate subset of vectors to test convergence " << std::endl;
|
||||
|
||||
Field B(grid); B.Checkerboard() = evec[0].Checkerboard();
|
||||
|
||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||
int allconv =1;
|
||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||
int j = Nstop-jj;
|
||||
RealD e = eval2_copy[j]; // Discard the evalue
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||
allconv=0;
|
||||
}
|
||||
}
|
||||
// Do evec[0] for good measure
|
||||
{
|
||||
int j=0;
|
||||
RealD e = eval2_copy[0];
|
||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||
}
|
||||
if ( allconv ) Nconv = Nstop;
|
||||
|
||||
// test if we converged, if so, terminate
|
||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||
// if( Nconv>=Nstop || beta_k < betastp){
|
||||
if( Nconv>=Nstop){
|
||||
goto converged;
|
||||
}
|
||||
|
||||
} else {
|
||||
std::cout << GridLogIRL << "iter < MinRestart: do not yet test for convergence\n";
|
||||
} // end of iter loop
|
||||
}
|
||||
|
||||
std::cout<<GridLogError<<"\n NOT converged.\n";
|
||||
abort();
|
||||
|
||||
converged:
|
||||
{
|
||||
Field B(grid); B.Checkerboard() = evec[0].Checkerboard();
|
||||
basisRotate(evec,Qt,0,Nk,0,Nk,Nm);
|
||||
std::cout << GridLogIRL << " Rotated basis"<<std::endl;
|
||||
Nconv=0;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Full final convergence test; unconditionally applied
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
for(int j = 0; j<=Nk; j++){
|
||||
B=evec[j];
|
||||
if( _Tester.ReconstructEval(j,eresid,B,eval2[j],evalMaxApprox) ) {
|
||||
Nconv++;
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nconv < Nstop )
|
||||
std::cout << GridLogIRL << "Nconv ("<<Nconv<<") < Nstop ("<<Nstop<<")"<<std::endl;
|
||||
|
||||
eval=eval2;
|
||||
|
||||
//Keep only converged
|
||||
eval.resize(Nconv);// Nstop?
|
||||
evec.resize(Nconv,grid);// Nstop?
|
||||
basisSortInPlace(evec,eval,reverse);
|
||||
|
||||
}
|
||||
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL << "ImplicitlyRestartedLanczos CONVERGED ; Summary :\n";
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
std::cout << GridLogIRL << " -- Iterations = "<< iter << "\n";
|
||||
std::cout << GridLogIRL << " -- beta(k) = "<< beta_k << "\n";
|
||||
std::cout << GridLogIRL << " -- Nconv = "<< Nconv << "\n";
|
||||
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
/* Saad PP. 195
|
||||
1. Choose an initial vector v1 of 2-norm unity. Set β1 ≡ 0, v0 ≡ 0
|
||||
2. For k = 1,2,...,m Do:
|
||||
3. wk:=Avk−βkv_{k−1}
|
||||
4. αk:=(wk,vk) //
|
||||
5. wk:=wk−αkvk // wk orthog vk
|
||||
6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
7. vk+1 := wk/βk+1
|
||||
8. EndDo
|
||||
*/
|
||||
void step(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
std::vector<Field>& evec,
|
||||
Field& w,int Nm,int k)
|
||||
{
|
||||
const RealD tiny = 1.0e-20;
|
||||
assert( k< Nm );
|
||||
|
||||
GridStopWatch gsw_op,gsw_o;
|
||||
|
||||
Field& evec_k = evec[k];
|
||||
|
||||
_PolyOp(evec_k,w); std::cout<<GridLogIRL << "PolyOp" <<std::endl;
|
||||
|
||||
if(k>0) w -= lme[k-1] * evec[k-1];
|
||||
|
||||
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk)
|
||||
RealD alph = real(zalph);
|
||||
|
||||
w = w - alph * evec_k;// 5. wk:=wk−αkvk
|
||||
|
||||
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
|
||||
// 7. vk+1 := wk/βk+1
|
||||
|
||||
lmd[k] = alph;
|
||||
lme[k] = beta;
|
||||
|
||||
if (k>0 && k % orth_period == 0) {
|
||||
orthogonalize(w,evec,k); // orthonormalise
|
||||
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl;
|
||||
}
|
||||
|
||||
if(k < Nm-1) evec[k+1] = w;
|
||||
|
||||
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
|
||||
if ( beta < tiny )
|
||||
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
|
||||
}
|
||||
|
||||
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt, // Nm x Nm
|
||||
GridBase *grid)
|
||||
{
|
||||
Eigen::MatrixXd TriDiag = Eigen::MatrixXd::Zero(Nk,Nk);
|
||||
|
||||
for(int i=0;i<Nk;i++) TriDiag(i,i) = lmd[i];
|
||||
for(int i=0;i<Nk-1;i++) TriDiag(i,i+1) = lme[i];
|
||||
for(int i=0;i<Nk-1;i++) TriDiag(i+1,i) = lme[i];
|
||||
|
||||
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eigensolver(TriDiag);
|
||||
|
||||
for (int i = 0; i < Nk; i++) {
|
||||
lmd[Nk-1-i] = eigensolver.eigenvalues()(i);
|
||||
}
|
||||
for (int i = 0; i < Nk; i++) {
|
||||
for (int j = 0; j < Nk; j++) {
|
||||
Qt(Nk-1-i,j) = eigensolver.eigenvectors()(j,i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// File could end here if settle on Eigen ??? !!!
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
void QR_decomp(std::vector<RealD>& lmd, // Nm
|
||||
std::vector<RealD>& lme, // Nm
|
||||
int Nk, int Nm, // Nk, Nm
|
||||
Eigen::MatrixXd& Qt, // Nm x Nm matrix
|
||||
RealD Dsh, int kmin, int kmax)
|
||||
{
|
||||
int k = kmin-1;
|
||||
RealD x;
|
||||
|
||||
RealD Fden = 1.0/hypot(lmd[k]-Dsh,lme[k]);
|
||||
RealD c = ( lmd[k] -Dsh) *Fden;
|
||||
RealD s = -lme[k] *Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
x =-s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt(k,i);
|
||||
RealD Qtmp2 = Qt(k+1,i);
|
||||
Qt(k,i) = c*Qtmp1 - s*Qtmp2;
|
||||
Qt(k+1,i)= s*Qtmp1 + c*Qtmp2;
|
||||
}
|
||||
|
||||
// Givens transformations
|
||||
for(int k = kmin; k < kmax-1; ++k){
|
||||
|
||||
RealD Fden = 1.0/hypot(x,lme[k-1]);
|
||||
RealD c = lme[k-1]*Fden;
|
||||
RealD s = - x*Fden;
|
||||
|
||||
RealD tmpa1 = lmd[k];
|
||||
RealD tmpa2 = lmd[k+1];
|
||||
RealD tmpb = lme[k];
|
||||
|
||||
lmd[k] = c*c*tmpa1 +s*s*tmpa2 -2.0*c*s*tmpb;
|
||||
lmd[k+1] = s*s*tmpa1 +c*c*tmpa2 +2.0*c*s*tmpb;
|
||||
lme[k] = c*s*(tmpa1-tmpa2) +(c*c-s*s)*tmpb;
|
||||
lme[k-1] = c*lme[k-1] -s*x;
|
||||
|
||||
if(k != kmax-2){
|
||||
x = -s*lme[k+1];
|
||||
lme[k+1] = c*lme[k+1];
|
||||
}
|
||||
|
||||
for(int i=0; i<Nk; ++i){
|
||||
RealD Qtmp1 = Qt(k,i);
|
||||
RealD Qtmp2 = Qt(k+1,i);
|
||||
Qt(k,i) = c*Qtmp1 -s*Qtmp2;
|
||||
Qt(k+1,i) = s*Qtmp1 +c*Qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void diagonalize(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
Qt = Eigen::MatrixXd::Identity(Nm,Nm);
|
||||
if ( diagonalisation == IRLdiagonaliseWithDSTEGR ) {
|
||||
diagonalize_lapack(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithQR ) {
|
||||
diagonalize_QR(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else if ( diagonalisation == IRLdiagonaliseWithEigen ) {
|
||||
diagonalize_Eigen(lmd,lme,Nk,Nm,Qt,grid);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_LAPACK
|
||||
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||
double *work, int *lwork, int *iwork, int *liwork,
|
||||
int *info);
|
||||
#endif
|
||||
|
||||
void diagonalize_lapack(std::vector<RealD>& lmd,
|
||||
std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd& Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
#ifdef USE_LAPACK
|
||||
const int size = Nm;
|
||||
int NN = Nk;
|
||||
double evals_tmp[NN];
|
||||
double evec_tmp[NN][NN];
|
||||
memset(evec_tmp[0],0,sizeof(double)*NN*NN);
|
||||
double DD[NN];
|
||||
double EE[NN];
|
||||
for (int i = 0; i< NN; i++) {
|
||||
for (int j = i - 1; j <= i + 1; j++) {
|
||||
if ( j < NN && j >= 0 ) {
|
||||
if (i==j) DD[i] = lmd[i];
|
||||
if (i==j) evals_tmp[i] = lmd[i];
|
||||
if (j==(i-1)) EE[j] = lme[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
int evals_found;
|
||||
int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
|
||||
int liwork = 3+NN*10 ;
|
||||
int iwork[liwork];
|
||||
double work[lwork];
|
||||
int isuppz[2*NN];
|
||||
char jobz = 'V'; // calculate evals & evecs
|
||||
char range = 'I'; // calculate all evals
|
||||
// char range = 'A'; // calculate all evals
|
||||
char uplo = 'U'; // refer to upper half of original matrix
|
||||
char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
|
||||
int ifail[NN];
|
||||
int info;
|
||||
int total = grid->_Nprocessors;
|
||||
int node = grid->_processor;
|
||||
int interval = (NN/total)+1;
|
||||
double vl = 0.0, vu = 0.0;
|
||||
int il = interval*node+1 , iu = interval*(node+1);
|
||||
if (iu > NN) iu=NN;
|
||||
double tol = 0.0;
|
||||
if (1) {
|
||||
memset(evals_tmp,0,sizeof(double)*NN);
|
||||
if ( il <= NN){
|
||||
LAPACK_dstegr(&jobz, &range, &NN,
|
||||
(double*)DD, (double*)EE,
|
||||
&vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
|
||||
&tol, // tolerance
|
||||
&evals_found, evals_tmp, (double*)evec_tmp, &NN,
|
||||
isuppz,
|
||||
work, &lwork, iwork, &liwork,
|
||||
&info);
|
||||
for (int i = iu-1; i>= il-1; i--){
|
||||
evals_tmp[i] = evals_tmp[i - (il-1)];
|
||||
if (il>1) evals_tmp[i-(il-1)]=0.;
|
||||
for (int j = 0; j< NN; j++){
|
||||
evec_tmp[i][j] = evec_tmp[i - (il-1)][j];
|
||||
if (il>1) evec_tmp[i-(il-1)][j]=0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
grid->GlobalSumVector(evals_tmp,NN);
|
||||
grid->GlobalSumVector((double*)evec_tmp,NN*NN);
|
||||
}
|
||||
}
|
||||
// Safer to sort instead of just reversing it,
|
||||
// but the document of the routine says evals are sorted in increasing order.
|
||||
// qr gives evals in decreasing order.
|
||||
for(int i=0;i<NN;i++){
|
||||
lmd [NN-1-i]=evals_tmp[i];
|
||||
for(int j=0;j<NN;j++){
|
||||
Qt((NN-1-i),j)=evec_tmp[i][j];
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void diagonalize_QR(std::vector<RealD>& lmd, std::vector<RealD>& lme,
|
||||
int Nk, int Nm,
|
||||
Eigen::MatrixXd & Qt,
|
||||
GridBase *grid)
|
||||
{
|
||||
int QRiter = 100*Nm;
|
||||
int kmin = 1;
|
||||
int kmax = Nk;
|
||||
|
||||
// (this should be more sophisticated)
|
||||
for(int iter=0; iter<QRiter; ++iter){
|
||||
|
||||
// determination of 2x2 leading submatrix
|
||||
RealD dsub = lmd[kmax-1]-lmd[kmax-2];
|
||||
RealD dd = std::sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]);
|
||||
RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub)));
|
||||
// (Dsh: shift)
|
||||
|
||||
// transformation
|
||||
QR_decomp(lmd,lme,Nk,Nm,Qt,Dsh,kmin,kmax); // Nk, Nm
|
||||
|
||||
// Convergence criterion (redef of kmin and kamx)
|
||||
for(int j=kmax-1; j>= kmin; --j){
|
||||
RealD dds = fabs(lmd[j-1])+fabs(lmd[j]);
|
||||
if(fabs(lme[j-1])+dds > dds){
|
||||
kmax = j+1;
|
||||
goto continued;
|
||||
}
|
||||
}
|
||||
QRiter = iter;
|
||||
return;
|
||||
|
||||
continued:
|
||||
for(int j=0; j<kmax-1; ++j){
|
||||
RealD dds = fabs(lmd[j])+fabs(lmd[j+1]);
|
||||
if(fabs(lme[j])+dds > dds){
|
||||
kmin = j+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << GridLogError << "[QL method] Error - Too many iteration: "<<QRiter<<"\n";
|
||||
abort();
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
405
Grid/algorithms/iterative/LocalCoherenceLanczos.h
Normal file
405
Grid/algorithms/iterative/LocalCoherenceLanczos.h
Normal file
@ -0,0 +1,405 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/LocalCoherenceLanczos.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christoph Lehner <clehner@bnl.gov>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_LOCAL_COHERENCE_IRL_H
|
||||
#define GRID_LOCAL_COHERENCE_IRL_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
struct LanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams,
|
||||
ChebyParams, Cheby,/*Chebyshev*/
|
||||
int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/
|
||||
int, Nk, /*Vecs in Lanczos seek converge*/
|
||||
int, Nm, /*Total vecs in Lanczos include restart*/
|
||||
RealD, resid, /*residual*/
|
||||
int, MaxIt,
|
||||
RealD, betastp, /* ? */
|
||||
int, MinRes); // Must restart
|
||||
};
|
||||
|
||||
struct LocalCoherenceLanczosParams : Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||
bool, saveEvecs,
|
||||
bool, doFine,
|
||||
bool, doFineRead,
|
||||
bool, doCoarse,
|
||||
bool, doCoarseRead,
|
||||
LanczosParams, FineParams,
|
||||
LanczosParams, CoarseParams,
|
||||
ChebyParams, Smoother,
|
||||
RealD , coarse_relax_tol,
|
||||
std::vector<int>, blockSize,
|
||||
std::string, config,
|
||||
std::vector < ComplexD >, omega,
|
||||
RealD, mass,
|
||||
RealD, M5);
|
||||
};
|
||||
|
||||
// Duplicate functionality; ProjectedFunctionHermOp could be used with the trivial function
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedHermOp(LinearOperatorBase<FineField>& linop, std::vector<FineField> & _subspace) :
|
||||
_Linop(linop), subspace(_subspace)
|
||||
{
|
||||
assert(subspace.size() >0);
|
||||
};
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
GridBase *FineGrid = subspace[0].Grid();
|
||||
int checkerboard = subspace[0].Checkerboard();
|
||||
|
||||
FineField fin (FineGrid); fin.Checkerboard()= checkerboard;
|
||||
FineField fout(FineGrid); fout.Checkerboard() = checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Promote to fine"<<std::endl;
|
||||
_Linop.HermOp(fin,fout); std::cout<<GridLogIRL<<"ProjectedHermop : HermOp (fine) "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
|
||||
OperatorFunction<FineField> & _poly;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
std::vector<FineField> &subspace;
|
||||
|
||||
ProjectedFunctionHermOp(OperatorFunction<FineField> & poly,
|
||||
LinearOperatorBase<FineField>& linop,
|
||||
std::vector<FineField> & _subspace) :
|
||||
_poly(poly),
|
||||
_Linop(linop),
|
||||
subspace(_subspace)
|
||||
{ };
|
||||
|
||||
void operator()(const CoarseField& in, CoarseField& out) {
|
||||
|
||||
GridBase *FineGrid = subspace[0].Grid();
|
||||
int checkerboard = subspace[0].Checkerboard();
|
||||
|
||||
FineField fin (FineGrid); fin.Checkerboard() =checkerboard;
|
||||
FineField fout(FineGrid);fout.Checkerboard() =checkerboard;
|
||||
|
||||
blockPromote(in,fin,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Promote to fine"<<std::endl;
|
||||
_poly(_Linop,fin,fout); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Poly "<<std::endl;
|
||||
blockProject(out,fout,subspace); std::cout<<GridLogIRL<<"ProjectedFunctionHermop : Project to coarse "<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester<Lattice<iVector<CComplex,nbasis > > >
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
LinearFunction<CoarseField> & _Poly;
|
||||
OperatorFunction<FineField> & _smoother;
|
||||
LinearOperatorBase<FineField> &_Linop;
|
||||
RealD _coarse_relax_tol;
|
||||
std::vector<FineField> &_subspace;
|
||||
|
||||
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
|
||||
OperatorFunction<FineField> &smoother,
|
||||
LinearOperatorBase<FineField> &Linop,
|
||||
std::vector<FineField> &subspace,
|
||||
RealD coarse_relax_tol=5.0e3)
|
||||
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
|
||||
_coarse_relax_tol(coarse_relax_tol)
|
||||
{ };
|
||||
|
||||
int TestConvergence(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
CoarseField v(B);
|
||||
RealD eval_poly = eval;
|
||||
|
||||
// Apply operator
|
||||
_Poly(B,v);
|
||||
|
||||
RealD vnum = real(innerProduct(B,v)); // HermOp.
|
||||
RealD vden = norm2(B);
|
||||
RealD vv0 = norm2(v);
|
||||
eval = vnum/vden;
|
||||
v -= eval*B;
|
||||
|
||||
RealD vv = norm2(v) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
|
||||
int conv=0;
|
||||
if( (vv<eresid*eresid) ) conv = 1;
|
||||
return conv;
|
||||
}
|
||||
int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox)
|
||||
{
|
||||
GridBase *FineGrid = _subspace[0].Grid();
|
||||
int checkerboard = _subspace[0].Checkerboard();
|
||||
FineField fB(FineGrid);fB.Checkerboard() =checkerboard;
|
||||
FineField fv(FineGrid);fv.Checkerboard() =checkerboard;
|
||||
|
||||
blockPromote(B,fv,_subspace);
|
||||
|
||||
_smoother(_Linop,fv,fB);
|
||||
|
||||
RealD eval_poly = eval;
|
||||
_Linop.HermOp(fB,fv);
|
||||
|
||||
RealD vnum = real(innerProduct(fB,fv)); // HermOp.
|
||||
RealD vden = norm2(fB);
|
||||
RealD vv0 = norm2(fv);
|
||||
eval = vnum/vden;
|
||||
fv -= eval*fB;
|
||||
RealD vv = norm2(fv) / ::pow(evalMaxApprox,2.0);
|
||||
|
||||
std::cout.precision(13);
|
||||
std::cout<<GridLogIRL << "[" << std::setw(3)<<j<<"] "
|
||||
<<"eval = "<<std::setw(25)<< eval << " (" << eval_poly << ")"
|
||||
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
|
||||
<<std::endl;
|
||||
if ( j > nbasis ) eresid = eresid*_coarse_relax_tol;
|
||||
if( (vv<eresid*eresid) ) return 1;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Make serializable Lanczos params
|
||||
////////////////////////////////////////////
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class LocalCoherenceLanczos
|
||||
{
|
||||
public:
|
||||
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||
typedef Lattice<Fobj> FineField;
|
||||
|
||||
protected:
|
||||
GridBase *_CoarseGrid;
|
||||
GridBase *_FineGrid;
|
||||
int _checkerboard;
|
||||
LinearOperatorBase<FineField> & _FineOp;
|
||||
|
||||
std::vector<RealD> &evals_fine;
|
||||
std::vector<RealD> &evals_coarse;
|
||||
std::vector<FineField> &subspace;
|
||||
std::vector<CoarseField> &evec_coarse;
|
||||
|
||||
private:
|
||||
std::vector<RealD> _evals_fine;
|
||||
std::vector<RealD> _evals_coarse;
|
||||
std::vector<FineField> _subspace;
|
||||
std::vector<CoarseField> _evec_coarse;
|
||||
|
||||
public:
|
||||
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (_evals_fine),
|
||||
evals_coarse(_evals_coarse),
|
||||
subspace (_subspace),
|
||||
evec_coarse(_evec_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Alternate constructore, external storage for use by Hadrons module
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
LocalCoherenceLanczos(GridBase *FineGrid,
|
||||
GridBase *CoarseGrid,
|
||||
LinearOperatorBase<FineField> &FineOp,
|
||||
int checkerboard,
|
||||
std::vector<FineField> &ext_subspace,
|
||||
std::vector<CoarseField> &ext_coarse,
|
||||
std::vector<RealD> &ext_eval_fine,
|
||||
std::vector<RealD> &ext_eval_coarse
|
||||
) :
|
||||
_CoarseGrid(CoarseGrid),
|
||||
_FineGrid(FineGrid),
|
||||
_FineOp(FineOp),
|
||||
_checkerboard(checkerboard),
|
||||
evals_fine (ext_eval_fine),
|
||||
evals_coarse(ext_eval_coarse),
|
||||
subspace (ext_subspace),
|
||||
evec_coarse (ext_coarse)
|
||||
{
|
||||
evals_fine.resize(0);
|
||||
evals_coarse.resize(0);
|
||||
};
|
||||
|
||||
void Orthogonalise(void ) {
|
||||
CoarseScalar InnerProd(_CoarseGrid);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
|
||||
blockOrthogonalise(InnerProd,subspace);
|
||||
};
|
||||
|
||||
template<typename T> static RealD normalise(T& v)
|
||||
{
|
||||
RealD nn = norm2(v);
|
||||
nn = ::sqrt(nn);
|
||||
v = v * (1.0/nn);
|
||||
return nn;
|
||||
}
|
||||
/*
|
||||
void fakeFine(void)
|
||||
{
|
||||
int Nk = nbasis;
|
||||
subspace.resize(Nk,_FineGrid);
|
||||
subspace[0]=1.0;
|
||||
subspace[0].Checkerboard()=_checkerboard;
|
||||
normalise(subspace[0]);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
for(int k=1;k<Nk;k++){
|
||||
subspace[k].Checkerboard()=_checkerboard;
|
||||
Op(subspace[k-1],subspace[k]);
|
||||
normalise(subspace[k]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void testFine(RealD resid)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
ImplicitlyRestartedLanczosHermOpTester<FineField> SimpleTester(Op);
|
||||
for(int k=0;k<nbasis;k++){
|
||||
assert(SimpleTester.ReconstructEval(k,resid,subspace[k],evals_fine[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
|
||||
void testCoarse(RealD resid,ChebyParams cheby_smooth,RealD relax)
|
||||
{
|
||||
assert(evals_fine.size() == nbasis);
|
||||
assert(subspace.size() == nbasis);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (ChebySmooth,_FineOp,subspace);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
for(int k=0;k<evec_coarse.size();k++){
|
||||
if ( k < nbasis ) {
|
||||
assert(ChebySmoothTester.ReconstructEval(k,resid,evec_coarse[k],evals_coarse[k],1.0)==1);
|
||||
} else {
|
||||
assert(ChebySmoothTester.ReconstructEval(k,resid*relax,evec_coarse[k],evals_coarse[k],1.0)==1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void calcFine(ChebyParams cheby_parms,int Nstop,int Nk,int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
assert(nbasis<=Nm);
|
||||
Chebyshev<FineField> Cheby(cheby_parms);
|
||||
FunctionHermOp<FineField> ChebyOp(Cheby,_FineOp);
|
||||
PlainHermOp<FineField> Op(_FineOp);
|
||||
|
||||
evals_fine.resize(Nm);
|
||||
subspace.resize(Nm,_FineGrid);
|
||||
|
||||
ImplicitlyRestartedLanczos<FineField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
|
||||
FineField src(_FineGrid); src=1.0; src.Checkerboard() = _checkerboard;
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(evals_fine,subspace,src,Nconv,false);
|
||||
|
||||
// Shrink down to number saved
|
||||
assert(Nstop>=nbasis);
|
||||
assert(Nconv>=nbasis);
|
||||
evals_fine.resize(nbasis);
|
||||
subspace.resize(nbasis,_FineGrid);
|
||||
}
|
||||
void calcCoarse(ChebyParams cheby_op,ChebyParams cheby_smooth,RealD relax,
|
||||
int Nstop, int Nk, int Nm,RealD resid,
|
||||
RealD MaxIt, RealD betastp, int MinRes)
|
||||
{
|
||||
Chebyshev<FineField> Cheby(cheby_op);
|
||||
ProjectedHermOp<Fobj,CComplex,nbasis> Op(_FineOp,subspace);
|
||||
ProjectedFunctionHermOp<Fobj,CComplex,nbasis> ChebyOp (Cheby,_FineOp,subspace);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// create a smoother and see if we can get a cheap convergence test and smooth inside the IRL
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Chebyshev<FineField> ChebySmooth(cheby_smooth);
|
||||
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
|
||||
|
||||
evals_coarse.resize(Nm);
|
||||
evec_coarse.resize(Nm,_CoarseGrid);
|
||||
|
||||
CoarseField src(_CoarseGrid); src=1.0;
|
||||
|
||||
ImplicitlyRestartedLanczos<CoarseField> IRL(ChebyOp,ChebyOp,ChebySmoothTester,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes);
|
||||
int Nconv=0;
|
||||
IRL.calc(evals_coarse,evec_coarse,src,Nconv,false);
|
||||
assert(Nconv>=Nstop);
|
||||
evals_coarse.resize(Nstop);
|
||||
evec_coarse.resize (Nstop,_CoarseGrid);
|
||||
for (int i=0;i<Nstop;i++){
|
||||
std::cout << i << " Coarse eval = " << evals_coarse[i] << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
60
Grid/algorithms/iterative/NormalEquations.h
Normal file
60
Grid/algorithms/iterative/NormalEquations.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/NormalEquations.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_NORMAL_EQUATIONS_H
|
||||
#define GRID_NORMAL_EQUATIONS_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form an NE solver calling a Herm solver
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class NormalEquations : public OperatorFunction<Field>{
|
||||
private:
|
||||
SparseMatrixBase<Field> & _Matrix;
|
||||
OperatorFunction<Field> & _HermitianSolver;
|
||||
|
||||
public:
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations trick
|
||||
/////////////////////////////////////////////////////
|
||||
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver)
|
||||
: _Matrix(Matrix), _HermitianSolver(HermitianSolver) {};
|
||||
|
||||
void operator() (const Field &in, Field &out){
|
||||
|
||||
Field src(in.Grid());
|
||||
|
||||
_Matrix.Mdag(in,src);
|
||||
_HermitianSolver(src,out); // Mdag M out = Mdag in
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
119
Grid/algorithms/iterative/PrecConjugateResidual.h
Normal file
119
Grid/algorithms/iterative/PrecConjugateResidual.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||
#define GRID_PREC_CONJUGATE_RESIDUAL_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
template<class Field>
|
||||
class PrecConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec)
|
||||
{
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
RealD a, b, c, d;
|
||||
RealD cp, ssq,rsq;
|
||||
|
||||
RealD rAr, rAAr, rArp;
|
||||
RealD pAp, pAAp;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid);
|
||||
|
||||
psi=zero;
|
||||
r = src;
|
||||
Preconditioner(r,p);
|
||||
|
||||
|
||||
|
||||
Linop.HermOpAndNorm(p,Ap,pAp,pAAp);
|
||||
Ar=Ap;
|
||||
rAr=pAp;
|
||||
rAAr=pAAp;
|
||||
|
||||
cp =norm2(r);
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
if (verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<0<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
|
||||
Preconditioner(Ap,z);
|
||||
RealD rq= real(innerProduct(Ap,z));
|
||||
|
||||
a = rAr/rq;
|
||||
|
||||
axpy(psi,a,p,psi);
|
||||
cp = axpy_norm(r,-a,z,r);
|
||||
|
||||
rArp=rAr;
|
||||
|
||||
Linop.HermOpAndNorm(r,Ar,rAr,rAAr);
|
||||
|
||||
b =rAr/rArp;
|
||||
|
||||
axpy(p,b,p,r);
|
||||
pAAp=axpy_norm(Ap,b,Ap,Ar);
|
||||
|
||||
if(verbose) std::cout<<GridLogMessage<<"PrecConjugateResidual: iteration " <<k<<" residual "<<cp<< " target"<< rsq<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
Linop.HermOp(psi,Ap);
|
||||
axpy(r,-1.0,src,Ap);
|
||||
RealD true_resid = norm2(r)/ssq;
|
||||
std::cout<<GridLogMessage<<"PrecConjugateResidual: Converged on iteration " <<k
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual "<<sqrt(true_resid)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<"PrecConjugateResidual did NOT converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
230
Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
Normal file
230
Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h
Normal file
@ -0,0 +1,230 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/PrecGeneralisedConjugateResidual.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PREC_GCR_H
|
||||
#define GRID_PREC_GCR_H
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//VPGCR Abe and Zhang, 2005.
|
||||
//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING
|
||||
//Computing and Information Volume 2, Number 2, Pages 147-161
|
||||
//NB. Likely not original reference since they are focussing on a preconditioner variant.
|
||||
// but VPGCR was nicely written up in their paper
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Field>
|
||||
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> {
|
||||
public:
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
int verbose;
|
||||
int mmax;
|
||||
int nstep;
|
||||
int steps;
|
||||
GridStopWatch PrecTimer;
|
||||
GridStopWatch MatTimer;
|
||||
GridStopWatch LinalgTimer;
|
||||
|
||||
LinearFunction<Field> &Preconditioner;
|
||||
|
||||
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
|
||||
Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
Preconditioner(Prec),
|
||||
mmax(_mmax),
|
||||
nstep(_nstep)
|
||||
{
|
||||
verbose=1;
|
||||
};
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
||||
|
||||
psi=Zero();
|
||||
RealD cp, ssq,rsq;
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
||||
Field r(src.Grid());
|
||||
|
||||
PrecTimer.Reset();
|
||||
MatTimer.Reset();
|
||||
LinalgTimer.Reset();
|
||||
|
||||
GridStopWatch SolverTimer;
|
||||
SolverTimer.Start();
|
||||
|
||||
steps=0;
|
||||
for(int k=0;k<MaxIterations;k++){
|
||||
|
||||
cp=GCRnStep(Linop,src,psi,rsq);
|
||||
|
||||
std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<std::endl;
|
||||
|
||||
if(cp<rsq) {
|
||||
|
||||
SolverTimer.Stop();
|
||||
|
||||
Linop.HermOp(psi,r);
|
||||
axpy(r,-1.0,src,r);
|
||||
RealD tr = norm2(r);
|
||||
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps
|
||||
<< " computed residual "<<sqrt(cp/ssq)
|
||||
<< " true residual " <<sqrt(tr/ssq)
|
||||
<< " target " <<Tolerance <<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
|
||||
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){
|
||||
|
||||
RealD cp;
|
||||
RealD a, b;
|
||||
RealD zAz, zAAz;
|
||||
RealD rq;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
Field r(grid);
|
||||
Field z(grid);
|
||||
Field tmp(grid);
|
||||
Field ttmp(grid);
|
||||
Field Az(grid);
|
||||
|
||||
////////////////////////////////
|
||||
// history for flexible orthog
|
||||
////////////////////////////////
|
||||
std::vector<Field> q(mmax,grid);
|
||||
std::vector<Field> p(mmax,grid);
|
||||
std::vector<RealD> qq(mmax);
|
||||
|
||||
//////////////////////////////////
|
||||
// initial guess x0 is taken as nonzero.
|
||||
// r0=src-A x0 = src
|
||||
//////////////////////////////////
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(psi,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
r=src-Az;
|
||||
|
||||
/////////////////////
|
||||
// p = Prec(r)
|
||||
/////////////////////
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOp(z,tmp);
|
||||
MatTimer.Stop();
|
||||
|
||||
ttmp=tmp;
|
||||
tmp=tmp-r;
|
||||
|
||||
/*
|
||||
std::cout<<GridLogMessage<<r<<std::endl;
|
||||
std::cout<<GridLogMessage<<z<<std::endl;
|
||||
std::cout<<GridLogMessage<<ttmp<<std::endl;
|
||||
std::cout<<GridLogMessage<<tmp<<std::endl;
|
||||
*/
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||
MatTimer.Stop();
|
||||
|
||||
//p[0],q[0],qq[0]
|
||||
p[0]= z;
|
||||
q[0]= Az;
|
||||
qq[0]= zAAz;
|
||||
|
||||
cp =norm2(r);
|
||||
|
||||
for(int k=0;k<nstep;k++){
|
||||
|
||||
steps++;
|
||||
|
||||
int kp = k+1;
|
||||
int peri_k = k %mmax;
|
||||
int peri_kp= kp%mmax;
|
||||
|
||||
rq= real(innerProduct(r,q[peri_k])); // what if rAr not real?
|
||||
a = rq/qq[peri_k];
|
||||
|
||||
axpy(psi,a,p[peri_k],psi);
|
||||
|
||||
cp = axpy_norm(r,-a,q[peri_k],r);
|
||||
|
||||
if((k==nstep-1)||(cp<rsq)){
|
||||
return cp;
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"] resid " <<sqrt(cp/rsq)<<std::endl;
|
||||
|
||||
PrecTimer.Start();
|
||||
Preconditioner(r,z);// solve Az = r
|
||||
PrecTimer.Stop();
|
||||
|
||||
MatTimer.Start();
|
||||
Linop.HermOpAndNorm(z,Az,zAz,zAAz);
|
||||
Linop.HermOp(z,tmp);
|
||||
MatTimer.Stop();
|
||||
tmp=tmp-r;
|
||||
std::cout<<GridLogMessage<< " Preconditioner resid " <<sqrt(norm2(tmp)/norm2(r))<<std::endl;
|
||||
|
||||
q[peri_kp]=Az;
|
||||
p[peri_kp]=z;
|
||||
|
||||
int northog = ((kp)>(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history.
|
||||
for(int back=0;back<northog;back++){
|
||||
|
||||
int peri_back=(k-back)%mmax; assert((k-back)>=0);
|
||||
|
||||
b=-real(innerProduct(q[peri_back],Az))/qq[peri_back];
|
||||
p[peri_kp]=p[peri_kp]+b*p[peri_back];
|
||||
q[peri_kp]=q[peri_kp]+b*q[peri_back];
|
||||
|
||||
}
|
||||
qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm
|
||||
|
||||
|
||||
}
|
||||
assert(0); // never reached
|
||||
return cp;
|
||||
}
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
#endif
|
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
473
Grid/algorithms/iterative/SchurRedBlack.h
Normal file
@ -0,0 +1,473 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/algorithms/iterative/SchurRedBlack.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_SCHUR_RED_BLACK_H
|
||||
#define GRID_SCHUR_RED_BLACK_H
|
||||
|
||||
|
||||
/*
|
||||
* Red black Schur decomposition
|
||||
*
|
||||
* M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo)
|
||||
* (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 )
|
||||
* = L D U
|
||||
*
|
||||
* L^-1 = (1 0 )
|
||||
* (-MoeMee^{-1} 1 )
|
||||
* L^{dag} = ( 1 Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
* L^{-d} = ( 1 -Mee^{-dag} Moe^{dag} )
|
||||
* ( 0 1 )
|
||||
*
|
||||
* U^-1 = (1 -Mee^{-1} Meo)
|
||||
* (0 1 )
|
||||
* U^{dag} = ( 1 0)
|
||||
* (Meo^dag Mee^{-dag} 1)
|
||||
* U^{-dag} = ( 1 0)
|
||||
* (-Meo^dag Mee^{-dag} 1)
|
||||
***********************
|
||||
* M psi = eta
|
||||
***********************
|
||||
*Odd
|
||||
* i) D_oo psi_o = L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Wilson:
|
||||
* (D_oo)^{\dag} D_oo psi_o = (D_oo)^dag L^{-1} eta_o
|
||||
* Stag:
|
||||
* D_oo psi_o = L^{-1} eta = (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* L^-1 eta_o= (1 0 ) (e
|
||||
* (-MoeMee^{-1} 1 )
|
||||
*
|
||||
*Even
|
||||
* ii) Mee psi_e + Meo psi_o = src_e
|
||||
*
|
||||
* => sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
*
|
||||
*
|
||||
* TODO: Other options:
|
||||
*
|
||||
* a) change checkerboards for Schur e<->o
|
||||
*
|
||||
* Left precon by Moo^-1
|
||||
* b) Doo^{dag} M_oo^-dag Moo^-1 Doo psi_0 = (D_oo)^dag M_oo^-dag Moo^-1 L^{-1} eta_o
|
||||
* eta_o' = (D_oo)^dag M_oo^-dag Moo^-1 (eta_o - Moe Mee^{-1} eta_e)
|
||||
*
|
||||
* Right precon by Moo^-1
|
||||
* c) M_oo^-dag Doo^{dag} Doo Moo^-1 phi_0 = M_oo^-dag (D_oo)^dag L^{-1} eta_o
|
||||
* eta_o' = M_oo^-dag (D_oo)^dag (eta_o - Moe Mee^{-1} eta_e)
|
||||
* psi_o = M_oo^-1 phi_o
|
||||
* TODO: Deflation
|
||||
*/
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Use base class to share code
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Take a matrix and form a Red Black solver calling a Herm solver
|
||||
// Use of RB info prevents making SchurRedBlackSolve conform to standard interface
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackBase {
|
||||
protected:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
OperatorFunction<Field> & _HermitianRBSolver;
|
||||
int CBfactorise;
|
||||
bool subGuess;
|
||||
public:
|
||||
|
||||
SchurRedBlackBase(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false) :
|
||||
_HermitianRBSolver(HermitianRBSolver)
|
||||
{
|
||||
CBfactorise = 0;
|
||||
subtractGuess(initSubGuess);
|
||||
};
|
||||
void subtractGuess(const bool initSubGuess)
|
||||
{
|
||||
subGuess = initSubGuess;
|
||||
}
|
||||
bool isSubtractGuess(void)
|
||||
{
|
||||
return subGuess;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Shared code
|
||||
/////////////////////////////////////////////////////////////
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out){
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
ZeroGuesser<Field> guess;
|
||||
(*this)(_Matrix,in,out,guess);
|
||||
}
|
||||
|
||||
template<class Guesser>
|
||||
void operator()(Matrix &_Matrix, const std::vector<Field> &in, std::vector<Field> &out,Guesser &guess)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
int nblock = in.size();
|
||||
|
||||
std::vector<Field> src_o(nblock,grid);
|
||||
std::vector<Field> sol_o(nblock,grid);
|
||||
|
||||
std::vector<Field> guess_save;
|
||||
|
||||
Field resid(fgrid);
|
||||
Field tmp(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Prepare RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++){
|
||||
RedBlackSource(_Matrix,in[b],tmp,src_o[b]);
|
||||
}
|
||||
////////////////////////////////////////////////
|
||||
// Make the guesses
|
||||
////////////////////////////////////////////////
|
||||
if ( subGuess ) guess_save.resize(nblock,grid);
|
||||
|
||||
for(int b=0;b<nblock;b++){
|
||||
guess(src_o[b],sol_o[b]);
|
||||
|
||||
if ( subGuess ) {
|
||||
guess_save[b] = sol_o[b];
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the block solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase calling the solver for "<<nblock<<" RHS" <<std::endl;
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// A2A boolean behavioural control & reconstruct other checkerboard
|
||||
////////////////////////////////////////////////
|
||||
for(int b=0;b<nblock;b++) {
|
||||
|
||||
if (subGuess) sol_o[b] = sol_o[b] - guess_save[b];
|
||||
|
||||
///////// Needs even source //////////////
|
||||
pickCheckerboard(Even,tmp,in[b]);
|
||||
RedBlackSolution(_Matrix,sol_o[b],tmp,out[b]);
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Check unprec residual if possible
|
||||
/////////////////////////////////////////////////
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out[b],resid);
|
||||
resid = resid-in[b];
|
||||
RealD ns = norm2(in[b]);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase solver true unprec resid["<<b<<"] "<<std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout<<GridLogMessage<< "SchurRedBlackBase Guess subtracted after solve["<<b<<"] " << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
template<class Guesser>
|
||||
void operator() (Matrix & _Matrix,const Field &in, Field &out,Guesser &guess){
|
||||
|
||||
// FIXME CGdiagonalMee not implemented virtual function
|
||||
// FIXME use CBfactorise to control schur decomp
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field resid(fgrid);
|
||||
Field src_o(grid);
|
||||
Field src_e(grid);
|
||||
Field sol_o(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// RedBlack source
|
||||
////////////////////////////////////////////////
|
||||
RedBlackSource(_Matrix,in,src_e,src_o);
|
||||
|
||||
////////////////////////////////
|
||||
// Construct the guess
|
||||
////////////////////////////////
|
||||
Field tmp(grid);
|
||||
guess(src_o,sol_o);
|
||||
|
||||
Field guess_save(grid);
|
||||
guess_save = sol_o;
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// Call the red-black solver
|
||||
//////////////////////////////////////////////////////////////
|
||||
RedBlackSolve(_Matrix,src_o,sol_o);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Fionn A2A boolean behavioural control
|
||||
////////////////////////////////////////////////
|
||||
if (subGuess) sol_o= sol_o-guess_save;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// RedBlack solution needs the even source
|
||||
///////////////////////////////////////////////////
|
||||
RedBlackSolution(_Matrix,sol_o,src_e,out);
|
||||
|
||||
// Verify the unprec residual
|
||||
if ( ! subGuess ) {
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackBase solver true unprec resid "<< std::sqrt(nr/ns) << std::endl;
|
||||
} else {
|
||||
std::cout << GridLogMessage << "SchurRedBlackBase Guess subtracted after solve." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Override in derived. Not virtual as template methods
|
||||
/////////////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource (Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o) =0;
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) =0;
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)=0;
|
||||
|
||||
};
|
||||
|
||||
template<class Field> class SchurRedBlackStaggeredSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackStaggeredSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess)
|
||||
{
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
_Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm.
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e_c,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e(grid);
|
||||
|
||||
src_e = src_e_c; // Const correctness
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
src_e = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurStaggeredOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
template<class Field> using SchurRedBlackStagSolve = SchurRedBlackStaggeredSolve<Field>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal has Mooee on it.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagMooeeSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
SchurRedBlackDiagMooeeSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field> (HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Override RedBlack specialisation
|
||||
//////////////////////////////////////////////////////
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
|
||||
}
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
Field src_e_i(grid);
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
src_e_i = src_e-tmp; assert( src_e_i.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd );
|
||||
}
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagMooeeOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Site diagonal is identity, right preconditioned by Mee^inv
|
||||
// ( 1 - Meo Moo^inv Moe Mee^inv ) phi =( 1 - Meo Moo^inv Moe Mee^inv ) Mee psi = = eta = eta
|
||||
//=> psi = MeeInv phi
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class Field> class SchurRedBlackDiagTwoSolve : public SchurRedBlackBase<Field> {
|
||||
public:
|
||||
typedef CheckerBoardedSparseMatrixBase<Field> Matrix;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Wrap the usual normal equations Schur trick
|
||||
/////////////////////////////////////////////////////
|
||||
SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver, const bool initSubGuess = false)
|
||||
: SchurRedBlackBase<Field>(HermitianRBSolver,initSubGuess) {};
|
||||
|
||||
virtual void RedBlackSource(Matrix & _Matrix,const Field &src, Field &src_e,Field &src_o)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
|
||||
pickCheckerboard(Even,src_e,src);
|
||||
pickCheckerboard(Odd ,src_o,src);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = Mdag * (source_o - Moe MeeInv source_e)
|
||||
/////////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
_Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd);
|
||||
tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd);
|
||||
|
||||
// get the right MpcDag
|
||||
_HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd);
|
||||
}
|
||||
|
||||
virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol)
|
||||
{
|
||||
GridBase *grid = _Matrix.RedBlackGrid();
|
||||
GridBase *fgrid= _Matrix.Grid();
|
||||
|
||||
Field sol_o_i(grid);
|
||||
Field tmp(grid);
|
||||
Field sol_e(grid);
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// MooeeInv due to pecond
|
||||
////////////////////////////////////////////////
|
||||
_Matrix.MooeeInv(sol_o,tmp);
|
||||
sol_o_i = tmp;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
///////////////////////////////////////////////////
|
||||
_Matrix.Meooe(sol_o_i,tmp); assert( tmp.Checkerboard() ==Even);
|
||||
tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even);
|
||||
_Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
|
||||
setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even);
|
||||
setCheckerboard(sol,sol_o_i); assert( sol_o_i.Checkerboard() ==Odd );
|
||||
};
|
||||
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
};
|
||||
virtual void RedBlackSolve (Matrix & _Matrix,const std::vector<Field> &src_o, std::vector<Field> &sol_o)
|
||||
{
|
||||
SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
|
||||
this->_HermitianRBSolver(_HermOpEO,src_o,sol_o);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
Reference in New Issue
Block a user