1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Compare commits

..

No commits in common. "2111e7ab5fbb861e79174a0a400b87ffd12c21a9" and "b58fd80379f48fd06d466b9065793e7d02a8b48c" have entirely different histories.

10 changed files with 1724 additions and 486 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,573 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/GeneralCoarsenedMatrix.h
Copyright (C) 2015
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/QCD.h> // needed for Dagger(Yes|No), Inverse(Yes|No)
#include <Grid/lattice/PaddedCell.h>
#include <Grid/stencil/GeneralLocalStencil.h>
NAMESPACE_BEGIN(Grid);
// Fixme need coalesced read gpermute
template<class vobj> void gpermute(vobj & inout,int perm){
vobj tmp=inout;
if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;}
if (perm & 0x2 ) { permute(inout,tmp,1); tmp=inout;}
if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;}
if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;}
}
/////////////////////////////////////////////////////////////////
// Reuse Aggregation class from CoarsenedMatrix for now
// Might think about *smoothed* Aggregation
// Equivalent of Geometry class in cartesian case
/////////////////////////////////////////////////////////////////
class NonLocalStencilGeometry {
public:
int depth;
int hops;
int npoint;
std::vector<Coordinate> shifts;
Coordinate stencil_size;
Coordinate stencil_lo;
Coordinate stencil_hi;
GridCartesian *grid;
GridCartesian *Grid() {return grid;};
int Depth(void){return 1;}; // Ghost zone depth
int Hops(void){return hops;}; // # of hops=> level of corner fill in in stencil
virtual int DimSkip(void) =0;
virtual ~NonLocalStencilGeometry() {};
int Reverse(int point)
{
int Nd = Grid()->Nd();
Coordinate shft = shifts[point];
Coordinate rev(Nd);
for(int mu=0;mu<Nd;mu++) rev[mu]= -shft[mu];
for(int p=0;p<npoint;p++){
if(rev==shifts[p]){
return p;
}
}
assert(0);
return -1;
}
void BuildShifts(void)
{
this->shifts.resize(0);
int Nd = this->grid->Nd();
int dd = this->DimSkip();
for(int s0=this->stencil_lo[dd+0];s0<=this->stencil_hi[dd+0];s0++){
for(int s1=this->stencil_lo[dd+1];s1<=this->stencil_hi[dd+1];s1++){
for(int s2=this->stencil_lo[dd+2];s2<=this->stencil_hi[dd+2];s2++){
for(int s3=this->stencil_lo[dd+3];s3<=this->stencil_hi[dd+3];s3++){
Coordinate sft(Nd,0);
sft[dd+0] = s0;
sft[dd+1] = s1;
sft[dd+2] = s2;
sft[dd+3] = s3;
int nhops = abs(s0)+abs(s1)+abs(s2)+abs(s3);
if(nhops<=this->hops) this->shifts.push_back(sft);
}}}}
this->npoint = this->shifts.size();
std::cout << GridLogMessage << "NonLocalStencilGeometry has "<< this->npoint << " terms in stencil "<<std::endl;
}
NonLocalStencilGeometry(GridCartesian *_coarse_grid,int _hops) : grid(_coarse_grid), hops(_hops)
{
Coordinate latt = grid->GlobalDimensions();
stencil_size.resize(grid->Nd());
stencil_lo.resize(grid->Nd());
stencil_hi.resize(grid->Nd());
for(int d=0;d<grid->Nd();d++){
if ( latt[d] == 1 ) {
stencil_lo[d] = 0;
stencil_hi[d] = 0;
stencil_size[d]= 1;
} else if ( latt[d] == 2 ) {
stencil_lo[d] = -1;
stencil_hi[d] = 0;
stencil_size[d]= 2;
} else if ( latt[d] > 2 ) {
stencil_lo[d] = -1;
stencil_hi[d] = 1;
stencil_size[d]= 3;
}
}
};
};
// Need to worry about red-black now
class NonLocalStencilGeometry4D : public NonLocalStencilGeometry {
public:
virtual int DimSkip(void) { return 0;};
NonLocalStencilGeometry4D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { };
virtual ~NonLocalStencilGeometry4D() {};
};
class NonLocalStencilGeometry5D : public NonLocalStencilGeometry {
public:
virtual int DimSkip(void) { return 1; };
NonLocalStencilGeometry5D(GridCartesian *Coarse,int _hops) : NonLocalStencilGeometry(Coarse,_hops) { };
virtual ~NonLocalStencilGeometry5D() {};
};
/*
* Bunch of different options classes
*/
class NextToNextToNextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D {
public:
NextToNextToNextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,4)
{
this->BuildShifts();
};
};
class NextToNextToNextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D {
public:
NextToNextToNextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,4)
{
this->BuildShifts();
};
};
class NextToNearestStencilGeometry4D : public NonLocalStencilGeometry4D {
public:
NextToNearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,2)
{
this->BuildShifts();
};
};
class NextToNearestStencilGeometry5D : public NonLocalStencilGeometry5D {
public:
NextToNearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,2)
{
this->BuildShifts();
};
};
class NearestStencilGeometry4D : public NonLocalStencilGeometry4D {
public:
NearestStencilGeometry4D(GridCartesian *Coarse) : NonLocalStencilGeometry4D(Coarse,1)
{
this->BuildShifts();
};
};
class NearestStencilGeometry5D : public NonLocalStencilGeometry5D {
public:
NearestStencilGeometry5D(GridCartesian *Coarse) : NonLocalStencilGeometry5D(Coarse,1)
{
this->BuildShifts();
};
};
// Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis>
class GeneralCoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
public:
typedef GeneralCoarsenedMatrix<Fobj,CComplex,nbasis> GeneralCoarseOp;
typedef iVector<CComplex,nbasis > siteVector;
typedef iMatrix<CComplex,nbasis > siteMatrix;
typedef Lattice<iScalar<CComplex> > CoarseComplexField;
typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef iMatrix<CComplex,nbasis > Cobj;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField;
typedef CoarseVector Field;
////////////////////
// Data members
////////////////////
int hermitian;
GridBase * _FineGrid;
GridCartesian * _CoarseGrid;
NonLocalStencilGeometry &geom;
PaddedCell Cell;
GeneralLocalStencil Stencil;
std::vector<CoarseMatrix> _A;
std::vector<CoarseMatrix> _Adag;
///////////////////////
// Interface
///////////////////////
GridBase * Grid(void) { return _FineGrid; }; // this is all the linalg routines need to know
GridBase * FineGrid(void) { return _FineGrid; }; // this is all the linalg routines need to know
GridCartesian * CoarseGrid(void) { return _CoarseGrid; }; // this is all the linalg routines need to know
void ProjectNearestNeighbour(RealD shift, GeneralCoarseOp &CopyMe)
{
int nfound=0;
std::cout << " ProjectNearestNeighbour "<< CopyMe._A[0].Grid()<<std::endl;
for(int p=0;p<geom.npoint;p++){
for(int pp=0;pp<CopyMe.geom.npoint;pp++){
// Search for the same relative shift
// Avoids brutal handling of Grid pointers
if ( CopyMe.geom.shifts[pp]==geom.shifts[p] ) {
_A[p] = CopyMe.Cell.Extract(CopyMe._A[pp]);
_Adag[p] = CopyMe.Cell.Extract(CopyMe._Adag[pp]);
nfound++;
}
}
}
assert(nfound==geom.npoint);
ExchangeCoarseLinks();
}
GeneralCoarsenedMatrix(NonLocalStencilGeometry &_geom,GridBase *FineGrid, GridCartesian * CoarseGrid)
: geom(_geom),
_FineGrid(FineGrid),
_CoarseGrid(CoarseGrid),
hermitian(1),
Cell(_geom.Depth(),_CoarseGrid),
Stencil(Cell.grids.back(),geom.shifts)
{
{
int npoint = _geom.npoint;
autoView( Stencil_v , Stencil, AcceleratorRead);
int osites=Stencil.Grid()->oSites();
for(int ss=0;ss<osites;ss++){
for(int point=0;point<npoint;point++){
auto SE = Stencil_v.GetEntry(point,ss);
int o = SE->_offset;
assert( o< osites);
}
}
}
_A.resize(geom.npoint,CoarseGrid);
_Adag.resize(geom.npoint,CoarseGrid);
}
void M (const CoarseVector &in, CoarseVector &out)
{
Mult(_A,in,out);
}
void Mdag (const CoarseVector &in, CoarseVector &out)
{
if ( hermitian ) M(in,out);
else Mult(_Adag,in,out);
}
void Mult (std::vector<CoarseMatrix> &A,const CoarseVector &in, CoarseVector &out)
{
RealD tviews=0;
RealD ttot=0;
RealD tmult=0;
RealD texch=0;
RealD text=0;
ttot=-usecond();
conformable(CoarseGrid(),in.Grid());
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
CoarseVector tin=in;
texch-=usecond();
CoarseVector pin = Cell.Exchange(tin);
texch+=usecond();
CoarseVector pout(pin.Grid()); pout=Zero();
int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview;
const int Nsimd = CComplex::Nsimd();
int osites=pin.Grid()->oSites();
// int gsites=pin.Grid()->gSites();
RealD flops = 1.0* npoint * nbasis * nbasis * 8 * osites;
RealD bytes = (1.0*osites*sizeof(siteMatrix)*npoint+2.0*osites*sizeof(siteVector))*npoint;
// for(int point=0;point<npoint;point++){
// conformable(A[point],pin);
// }
{
tviews-=usecond();
autoView( in_v , pin, AcceleratorRead);
autoView( out_v , pout, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead);
tviews+=usecond();
for(int point=0;point<npoint;point++){
tviews-=usecond();
autoView( A_v, A[point],AcceleratorRead);
tviews+=usecond();
tmult-=usecond();
accelerator_for(sss, osites*nbasis, Nsimd, {
typedef decltype(coalescedRead(in_v[0])) calcVector;
int ss = sss/nbasis;
int b = sss%nbasis;
auto SE = Stencil_v.GetEntry(point,ss);
auto nbr = coalescedReadGeneralPermute(in_v[SE->_offset],SE->_permute,Nd);
auto res = out_v(ss)(b);
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(A_v[ss](b,bb))*nbr(bb);
}
coalescedWrite(out_v[ss](b),res);
});
tmult+=usecond();
}
}
text-=usecond();
out = Cell.Extract(pout);
text+=usecond();
ttot+=usecond();
std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult ext "<<text<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Kernel "<< flops/tmult<<" mflop/s"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Kernel "<< bytes/tmult<<" MB/s"<<std::endl;
std::cout << GridLogPerformance<<"Coarse flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
std::cout << GridLogPerformance<<"Coarse bytes "<< bytes/1e6<<" MB"<<std::endl;
};
void PopulateAdag(void)
{
for(int64_t bidx=0;bidx<CoarseGrid()->gSites() ;bidx++){
Coordinate bcoor;
CoarseGrid()->GlobalIndexToGlobalCoor(bidx,bcoor);
for(int p=0;p<geom.npoint;p++){
Coordinate scoor = bcoor;
for(int mu=0;mu<bcoor.size();mu++){
int L = CoarseGrid()->GlobalDimensions()[mu];
scoor[mu] = (bcoor[mu] - geom.shifts[p][mu] + L) % L; // Modulo arithmetic
}
// Flip to poke/peekLocalSite and not too bad
auto link = peekSite(_A[p],scoor);
int pp = geom.Reverse(p);
pokeSite(adj(link),_Adag[pp],bcoor);
}
}
}
/////////////////////////////////////////////////////////////
//
// A) Only reduced flops option is to use a padded cell of depth 4
// and apply MpcDagMpc in the padded cell.
//
// Makes for ONE application of MpcDagMpc per vector instead of 30 or 80.
// With the effective cell size around (B+8)^4 perhaps 12^4/4^4 ratio
// Cost is 81x more, same as stencil size.
//
// But: can eliminate comms and do as local dirichlet.
//
// Local exchange gauge field once.
// Apply to all vectors, local only computation.
// Must exchange ghost subcells in reverse process of PaddedCell to take inner products
//
// B) Can reduce cost: pad by 1, apply Deo (4^4+6^4+8^4+8^4 )/ (4x 4^4)
// pad by 2, apply Doe
// pad by 3, apply Deo
// then break out 8x directions; cost is ~10x MpcDagMpc per vector
//
// => almost factor of 10 in setup cost, excluding data rearrangement
//
// Intermediates -- ignore the corner terms, leave approximate and force Hermitian
// Intermediates -- pad by 2 and apply 1+8+24 = 33 times.
/////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////
// BFM HDCG style approach: Solve a system of equations to get Aij
//////////////////////////////////////////////////////////
/*
* Here, k,l index which possible shift within the 3^Nd "ball" connected by MdagM.
*
* conj(phases[block]) proj[k][ block*Nvec+j ] = \sum_ball e^{i q_k . delta} < phi_{block,j} | MdagM | phi_{(block+delta),i} >
* = \sum_ball e^{iqk.delta} A_ji
*
* Must invert matrix M_k,l = e^[i q_k . delta_l]
*
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
*/
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
GridBase *grid = FineGrid();
RealD tproj=0.0;
RealD teigen=0.0;
RealD tmat=0.0;
RealD tphase=0.0;
RealD tinv=0.0;
/////////////////////////////////////////////////////////////
// Orthogonalise the subblocks over the basis
/////////////////////////////////////////////////////////////
CoarseScalar InnerProd(CoarseGrid());
blockOrthogonalise(InnerProd,Subspace.subspace);
const int npoint = geom.npoint;
Coordinate clatt = CoarseGrid()->GlobalDimensions();
int Nd = CoarseGrid()->Nd();
/*
* Here, k,l index which possible momentum/shift within the N-points connected by MdagM.
* Matrix index i is mapped to this shift via
* geom.shifts[i]
*
* conj(pha[block]) proj[k (which mom)][j (basis vec cpt)][block]
* = \sum_{l in ball} e^{i q_k . delta_l} < phi_{block,j} | MdagM | phi_{(block+delta_l),i} >
* = \sum_{l in ball} e^{iqk.delta_l} A_ji^{b.b+l}
* = M_{kl} A_ji^{b.b+l}
*
* Must assemble and invert matrix M_k,l = e^[i q_k . delta_l]
*
* Where q_k = delta_k . (2*M_PI/global_nb[mu])
*
* Then A{ji}^{b,b+l} = M^{-1}_{lm} ComputeProj_{m,b,i,j}
*/
teigen-=usecond();
Eigen::MatrixXcd Mkl = Eigen::MatrixXcd::Zero(npoint,npoint);
Eigen::MatrixXcd invMkl = Eigen::MatrixXcd::Zero(npoint,npoint);
ComplexD ci(0.0,1.0);
for(int k=0;k<npoint;k++){ // Loop over momenta
for(int l=0;l<npoint;l++){ // Loop over nbr relative
ComplexD phase(0.0,0.0);
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ clatt[mu];
phase=phase+TwoPiL*geom.shifts[k][mu]*geom.shifts[l][mu];
}
phase=exp(phase*ci);
Mkl(k,l) = phase;
}
}
invMkl = Mkl.inverse();
teigen+=usecond();
///////////////////////////////////////////////////////////////////////
// Now compute the matrix elements of linop between the orthonormal
// set of vectors.
///////////////////////////////////////////////////////////////////////
FineField phaV(grid); // Phased block basis vector
FineField MphaV(grid);// Matrix applied
CoarseVector coarseInner(CoarseGrid());
std::vector<CoarseVector> ComputeProj(npoint,CoarseGrid());
std::vector<CoarseVector> FT(npoint,CoarseGrid());
for(int i=0;i<nbasis;i++){// Loop over basis vectors
std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
/////////////////////////////////////////////////////
// Stick a phase on every block
/////////////////////////////////////////////////////
tphase-=usecond();
CoarseComplexField coor(CoarseGrid());
CoarseComplexField pha(CoarseGrid()); pha=Zero();
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
RealD TwoPiL = M_PI * 2.0/ clatt[mu];
pha = pha + (TwoPiL * geom.shifts[p][mu]) * coor;
}
pha =exp(pha*ci);
phaV=Zero();
blockZAXPY(phaV,pha,Subspace.subspace[i],phaV);
tphase+=usecond();
/////////////////////////////////////////////////////////////////////
// Multiple phased subspace vector by matrix and project to subspace
// Remove local bulk phase to leave relative phases
/////////////////////////////////////////////////////////////////////
tmat-=usecond();
linop.Op(phaV,MphaV);
tmat+=usecond();
tproj-=usecond();
blockProject(coarseInner,MphaV,Subspace.subspace);
coarseInner = conjugate(pha) * coarseInner;
ComputeProj[p] = coarseInner;
tproj+=usecond();
}
tinv-=usecond();
for(int k=0;k<npoint;k++){
FT[k] = Zero();
for(int l=0;l<npoint;l++){
FT[k]= FT[k]+ invMkl(l,k)*ComputeProj[l];
}
int osites=CoarseGrid()->oSites();
autoView( A_v , _A[k], AcceleratorWrite);
autoView( FT_v , FT[k], AcceleratorRead);
accelerator_for(sss, osites, 1, {
for(int j=0;j<nbasis;j++){
A_v[sss](j,i) = FT_v[sss](j);
}
});
}
tinv+=usecond();
}
for(int p=0;p<geom.npoint;p++){
Coordinate coor({0,0,0,0,0});
auto sval = peekSite(_A[p],coor);
}
// Only needed if nonhermitian
if ( ! hermitian ) {
std::cout << GridLogMessage<<"PopulateAdag "<<std::endl;
PopulateAdag();
}
// Need to write something to populate Adag from A
std::cout << GridLogMessage<<"ExchangeCoarseLinks "<<std::endl;
ExchangeCoarseLinks();
std::cout << GridLogMessage<<"CoarsenOperator eigen "<<teigen<<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator phase "<<tphase<<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator mat "<<tmat <<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator proj "<<tproj<<" us"<<std::endl;
std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl;
}
void ExchangeCoarseLinks(void){
for(int p=0;p<geom.npoint;p++){
std::cout << "Exchange "<<p<<std::endl;
_A[p] = Cell.Exchange(_A[p]);
_Adag[p]= Cell.Exchange(_Adag[p]);
}
}
virtual void Mdiag (const Field &in, Field &out){ assert(0);};
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);};
virtual void MdirAll (const Field &in, std::vector<Field> &out){assert(0);};
};
NAMESPACE_END(Grid);

View File

@ -38,6 +38,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
* Vstart = P^Tx + Qb
* M1 = P^TM + Q
* M2=M3=1
* Vout = x
*/
NAMESPACE_BEGIN(Grid);
@ -67,13 +68,14 @@ class TwoLevelCG : public LinearFunction<Field>
grid = fine;
};
virtual void operator() (const Field &src, Field &x)
virtual void operator() (const Field &src, Field &psi)
{
Field resid(grid);
RealD f;
RealD rtzp,rtz,a,d,b;
RealD rptzp;
Field x(grid);
Field p(grid);
Field z(grid);
Field tmp(grid);
@ -83,6 +85,7 @@ class TwoLevelCG : public LinearFunction<Field>
Field rp (grid);
//Initial residual computation & set up
RealD guess = norm2(psi);
double tn;
GridStopWatch HDCGTimer;
@ -162,22 +165,14 @@ class TwoLevelCG : public LinearFunction<Field>
RealD srcnorm = sqrt(norm2(src));
RealD tmpnorm = sqrt(norm2(tmp));
RealD true_residual = tmpnorm/srcnorm;
std::cout<<GridLogMessage
<<"HDCG: true residual is "<<true_residual
<<" solution "<<xnorm
<<" source "<<srcnorm
<<" mmp "<<mmpnorm
<<std::endl;
std::cout<<GridLogMessage<<"HDCG: true residual is "<<true_residual
<<" solution "<<xnorm<<" source "<<srcnorm<<std::endl;
return;
}
}
std::cout<<GridLogMessage<<"HDCG: not converged"<<std::endl;
RealD xnorm = sqrt(norm2(x));
RealD srcnorm = sqrt(norm2(src));
std::cout<<GridLogMessage<<"HDCG: non-converged solution "<<xnorm<<" source "<<srcnorm<<std::endl;
std::cout << "HDCG: Pcg not converged"<<std::endl;
return ;
}
@ -202,6 +197,9 @@ class TwoLevelCG : public LinearFunction<Field>
/////////////////////////////////////////////////////////////////////
// Only Def1 has non-trivial Vout.
/////////////////////////////////////////////////////////////////////
virtual void Vout (Field & in, Field & out,Field & src){
out = in;
}
};
@ -223,13 +221,13 @@ class TwoLevelADEF2 : public TwoLevelCG<Field>
// more most opertor functions
TwoLevelADEF2(RealD tol,
Integer maxit,
LinearOperatorBase<Field> &FineLinop,
LinearFunction<Field> &Smoother,
LinearOperatorBase<Field> &FineLinop,
LinearFunction<Field> &Smoother,
LinearFunction<CoarseField> &CoarseSolver,
LinearFunction<CoarseField> &CoarseSolverPrecise,
Aggregation &Aggregates
) :
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid),
TwoLevelCG<Field>(tol,maxit,FineLinop,Smoother,Aggregates.FineGrid),
_CoarseSolver(CoarseSolver),
_CoarseSolverPrecise(CoarseSolverPrecise),
_Aggregates(Aggregates)
@ -323,13 +321,12 @@ public:
eval(_eval)
{};
// Can just inherit existing Vout
// Can just inherit existing M2
// Can just inherit existing M3
// Simple vstart - do nothing
virtual void Vstart(Field & x,const Field & src){
x=src; // Could apply Q
};
virtual void Vstart(Field & x,const Field & src){ x=src; };
// Override PcgM1
virtual void PcgM1(Field & in, Field & out)

View File

@ -457,7 +457,7 @@ until convergence
std::vector<Field>& evec,
Field& w,int Nm,int k)
{
std::cout<<GridLogDebug << "Lanczos step " <<k<<std::endl;
std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
const RealD tiny = 1.0e-20;
assert( k< Nm );
@ -487,7 +487,7 @@ until convergence
if(k < Nm-1) evec[k+1] = w;
std::cout<<GridLogIRL << "Lanczos step alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;

View File

@ -78,7 +78,7 @@ public:
void operator() (const Field &in, Field &out){
_Guess(in,out);
_HermitianSolver(_Matrix,in,out); //M out = in
_HermitianSolver(_Matrix,in,out); // Mdag M out = Mdag in
}
};

View File

@ -191,17 +191,16 @@ public:
out = Cell.Extract(pout);
text+=usecond();
ttot+=usecond();
std::cout << GridLogDebug<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
std::cout << GridLogDebug<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
std::cout << GridLogDebug<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
std::cout << GridLogDebug<<"Coarse Mult ext "<<text<<" us"<<std::endl;
std::cout << GridLogDebug<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
std::cout << GridLogDebug<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl;
std::cout << GridLogDebug<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl;
std::cout << GridLogDebug<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
std::cout << GridLogDebug<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult Aviews "<<tviews<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult exch "<<texch<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult mult "<<tmult<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult ext "<<text<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Mult tot "<<ttot<<" us"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Kernel flop/s "<< flops/tmult<<" mflop/s"<<std::endl;
std::cout << GridLogPerformance<<"Coarse Kernel bytes/s"<< bytes/tmult<<" MB/s"<<std::endl;
std::cout << GridLogPerformance<<"Coarse overall flops/s "<< flops/ttot<<" mflop/s"<<std::endl;
std::cout << GridLogPerformance<<"Coarse total bytes "<< bytes/1e6<<" MB"<<std::endl;
};
void PopulateAdag(void)

View File

@ -139,7 +139,7 @@ public:
if(dim==0) conformable(old_grid,unpadded_grid);
else conformable(old_grid,grids[dim-1]);
// std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
std::cout << " dim "<<dim<<" local "<<local << " padding to "<<plocal<<std::endl;
double tins=0, tshift=0;
@ -184,7 +184,7 @@ public:
}
tins += usecond() - t;
}
std::cout << GridLogDebug << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
std::cout << GridLogPerformance << "PaddedCell::Expand timings: cshift:" << tshift/1000 << "ms, insert-slice:" << tins/1000 << "ms" << std::endl;
return padded;
}

View File

@ -50,7 +50,7 @@ void SaveOperator(Coarsened &Operator,std::string file)
#endif
}
template<class Coarsened>
void LoadOperator(Coarsened &Operator,std::string file)
void LoadOperator(Coarsened Operator,std::string file)
{
#ifdef HAVE_LIME
emptyUserRecord record;
@ -219,7 +219,7 @@ int main (int argc, char ** argv)
////////////////////////////////////////////////////////////
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
bool load=true;
bool load=false;
if ( load ) {
LoadBasis(Aggregates,"Subspace.scidac");
LoadOperator(LittleDiracOp,"LittleDiracOp.scidac");
@ -230,7 +230,7 @@ int main (int argc, char ** argv)
// 600,200,200 -- 38 iters, 162s
// 600,200,100 -- 38 iters, 169s
// 600,200,50 -- 88 iters. 370s
800,
600,
200,
100,
0.0);
@ -241,16 +241,16 @@ int main (int argc, char ** argv)
// Try projecting to one hop only
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
LittleDiracOpProj.ProjectNearestNeighbour(0.2,LittleDiracOp);
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
HermMatrix CoarseOp (LittleDiracOp);
HermMatrix CoarseOp (LittleDiracOp);
HermMatrix CoarseOpProj (LittleDiracOpProj);
//////////////////////////////////////////
// Build a coarse lanczos
//////////////////////////////////////////
Chebyshev<CoarseVector> IRLCheby(0.2,40.0,71); // 1 iter
Chebyshev<CoarseVector> IRLCheby(0.5,60.0,71); // 1 iter
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp);
PlainHermOp<CoarseVector> IRLOp (CoarseOp);
int Nk=48;
@ -270,6 +270,7 @@ int main (int argc, char ** argv)
IRL.calc(eval,evec,c_src,Nconv);
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
//////////////////////////////////////////
// Build a coarse space solver
//////////////////////////////////////////
@ -282,8 +283,7 @@ int main (int argc, char ** argv)
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
c_res=Zero();
HPDSolve(c_src,c_res); c_ref = c_res;
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl;
//////////////////////////////////////////////////////////////////////////
// Deflated (with real op EV's) solve for the projected coarse op
// Work towards ADEF1 in the coarse space
@ -291,21 +291,13 @@ int main (int argc, char ** argv)
HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
c_res=Zero();
HPDSolveProj(c_src,c_res);
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl;
c_res = c_res - c_ref;
std::cout << "Projected solver error "<<norm2(c_res)<<std::endl;
//////////////////////////////////////////////////////////////////////
// Coarse ADEF1 with deflation space
//////////////////////////////////////////////////////////////////////
ChebyshevSmoother<CoarseVector,HermMatrix >
CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
// CoarseSmoother(0.1,37.,8,CoarseOpProj); //
// CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s
// CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s
// CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter
// CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter
ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(4.0,45.,16,CoarseOpProj); // 311
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj); // 311
////////////////////////////////////////////////////////
@ -326,25 +318,19 @@ int main (int argc, char ** argv)
// HDCG 38 iters 169s
TwoLevelADEF1defl<CoarseVector>
cADEF1(1.0e-8, 500,
cADEF1(1.0e-8, 100,
CoarseOp,
CoarseSmoother,
evec,eval);
c_res=Zero();
cADEF1(c_src,c_res);
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
c_res = c_res - c_ref;
std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
// cADEF1.Tolerance = 4.0e-2;
// cADEF1.Tolerance = 1.0e-1;
cADEF1.Tolerance = 5.0e-2;
cADEF1.Tolerance = 1.0e-9;
c_res=Zero();
cADEF1(c_src,c_res);
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
c_res = c_res - c_ref;
std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
@ -389,13 +375,16 @@ int main (int argc, char ** argv)
// Build a HDCG solver
//////////////////////////////////////////
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCG(1.0e-8, 100,
HDCG(1.0e-8, 3000,
FineHermOp,
Smoother,
HPDSolveSloppy,
HPDSolve,
Aggregates);
result=Zero();
HDCG(src,result);
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCGdefl(1.0e-8, 100,
FineHermOp,
@ -406,10 +395,6 @@ int main (int argc, char ** argv)
result=Zero();
HDCGdefl(src,result);
result=Zero();
HDCG(src,result);
}
}

View File

@ -1,423 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_general_coarse_hdcg.cc
Copyright (C) 2023
Author: Peter Boyle <pboyle@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/lattice/PaddedCell.h>
#include <Grid/stencil/GeneralLocalStencil.h>
//#include <Grid/algorithms/GeneralCoarsenedMatrix.h>
#include <Grid/algorithms/iterative/AdefGeneric.h>
using namespace std;
using namespace Grid;
template<class Coarsened>
void SaveOperator(Coarsened &Operator,std::string file)
{
#ifdef HAVE_LIME
emptyUserRecord record;
ScidacWriter WR(Operator.Grid()->IsBoss());
assert(Operator._A.size()==Operator.geom.npoint);
WR.open(file);
for(int p=0;p<Operator._A.size();p++){
auto tmp = Operator.Cell.Extract(Operator._A[p]);
WR.writeScidacFieldRecord(tmp,record);
}
WR.close();
#endif
}
template<class Coarsened>
void LoadOperator(Coarsened &Operator,std::string file)
{
#ifdef HAVE_LIME
emptyUserRecord record;
Grid::ScidacReader RD ;
RD.open(file);
assert(Operator._A.size()==Operator.geom.npoint);
for(int p=0;p<Operator.geom.npoint;p++){
conformable(Operator._A[p].Grid(),Operator.CoarseGrid());
RD.readScidacFieldRecord(Operator._A[p],record);
}
RD.close();
Operator.ExchangeCoarseLinks();
#endif
}
template<class aggregation>
void SaveBasis(aggregation &Agg,std::string file)
{
#ifdef HAVE_LIME
emptyUserRecord record;
ScidacWriter WR(Agg.FineGrid->IsBoss());
WR.open(file);
for(int b=0;b<Agg.subspace.size();b++){
WR.writeScidacFieldRecord(Agg.subspace[b],record);
}
WR.close();
#endif
}
template<class aggregation>
void LoadBasis(aggregation &Agg, std::string file)
{
#ifdef HAVE_LIME
emptyUserRecord record;
ScidacReader RD ;
RD.open(file);
for(int b=0;b<Agg.subspace.size();b++){
RD.readScidacFieldRecord(Agg.subspace[b],record);
}
RD.close();
#endif
}
template<class Field> class TestSolver : public LinearFunction<Field> {
public:
TestSolver() {};
void operator() (const Field &in, Field &out){ out = Zero(); }
};
RealD InverseApproximation(RealD x){
return 1.0/x;
}
// Want Op in CoarsenOp to call MatPcDagMatPc
template<class Field>
class HermOpAdaptor : public LinearOperatorBase<Field>
{
LinearOperatorBase<Field> & wrapped;
public:
HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme) {};
void Op (const Field &in, Field &out) { wrapped.HermOp(in,out); }
void HermOp(const Field &in, Field &out) { wrapped.HermOp(in,out); }
void AdjOp (const Field &in, Field &out){ wrapped.HermOp(in,out); }
void OpDiag (const Field &in, Field &out) { assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
};
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
FineOperator & _SmootherOperator;
Chebyshev<Field> Cheby;
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) :
_SmootherOperator(SmootherOperator),
Cheby(_lo,_hi,_ord,InverseApproximation)
{
std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl;
};
void operator() (const Field &in, Field &out)
{
Field tmp(in.Grid());
tmp = in;
Cheby(_SmootherOperator,tmp,out);
}
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
const int Ls=24;
const int nbasis = 40;
const int cb = 0 ;
RealD mass=0.00078;
RealD M5=1.8;
RealD b=1.5;
RealD c=0.5;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
// Construct a coarsened grid with 4^4 cell
Coordinate clatt = GridDefaultLatt();
for(int d=0;d<clatt.size();d++){
clatt[d] = clatt[d]/4;
}
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt,
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());;
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
///////////////////////// RNGs /////////////////////////////////
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::vector<int> cseeds({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
///////////////////////// Configuration /////////////////////////////////
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
std::string file("ckpoint_lat.4000");
NerscIO::readConfiguration(Umu,header,file);
//////////////////////// Fermion action //////////////////////////////////
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix;
HermFineMatrix FineHermOp(HermOpEO);
LatticeFermion result(FrbGrid); result=Zero();
LatticeFermion src(FrbGrid); random(RNG5,src);
// Run power method on FineHermOp
PowerMethod<LatticeFermion> PM; PM(HermOpEO,src);
////////////////////////////////////////////////////////////
///////////// Coarse basis and Little Dirac Operator ///////
////////////////////////////////////////////////////////////
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
typedef LittleDiracOperator::CoarseVector CoarseVector;
NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d);
NearestStencilGeometry5D geom_nn(Coarse5d);
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
Subspace Aggregates(Coarse5d,FrbGrid,cb);
////////////////////////////////////////////////////////////
// Need to check about red-black grid coarsening
////////////////////////////////////////////////////////////
LittleDiracOperator LittleDiracOp(geom,FrbGrid,Coarse5d);
bool load=true;
if ( load ) {
LoadBasis(Aggregates,"Subspace.scidac");
LoadOperator(LittleDiracOp,"LittleDiracOp.scidac");
} else {
Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,
95.0,0.1,
// 400,200,200 -- 48 iters
// 600,200,200 -- 38 iters, 162s
// 600,200,100 -- 38 iters, 169s
// 600,200,50 -- 88 iters. 370s
800,
200,
100,
0.0);
LittleDiracOp.CoarsenOperator(FineHermOp,Aggregates);
SaveBasis(Aggregates,"Subspace.scidac");
SaveOperator(LittleDiracOp,"LittleDiracOp.scidac");
}
// Try projecting to one hop only
LittleDiracOperator LittleDiracOpProj(geom_nn,FrbGrid,Coarse5d);
LittleDiracOpProj.ProjectNearestNeighbour(0.01,LittleDiracOp); // smaller shift 0.02? n
typedef HermitianLinearOperator<LittleDiracOperator,CoarseVector> HermMatrix;
HermMatrix CoarseOp (LittleDiracOp);
HermMatrix CoarseOpProj (LittleDiracOpProj);
//////////////////////////////////////////
// Build a coarse lanczos
//////////////////////////////////////////
Chebyshev<CoarseVector> IRLCheby(0.2,40.0,71); // 1 iter
FunctionHermOp<CoarseVector> IRLOpCheby(IRLCheby,CoarseOp);
PlainHermOp<CoarseVector> IRLOp (CoarseOp);
int Nk=48;
int Nm=64;
int Nstop=Nk;
ImplicitlyRestartedLanczos<CoarseVector> IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-5,20);
int Nconv;
std::vector<RealD> eval(Nm);
std::vector<CoarseVector> evec(Nm,Coarse5d);
CoarseVector c_src(Coarse5d); c_src=1.0;
CoarseVector c_res(Coarse5d);
CoarseVector c_ref(Coarse5d);
PowerMethod<CoarseVector> cPM; cPM(CoarseOp,c_src);
IRL.calc(eval,evec,c_src,Nconv);
DeflatedGuesser<CoarseVector> DeflCoarseGuesser(evec,eval);
//////////////////////////////////////////
// Build a coarse space solver
//////////////////////////////////////////
int maxit=20000;
ConjugateGradient<CoarseVector> CG(1.0e-8,maxit,false);
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,10000,false);
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
// HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,CoarseZeroGuesser);
HPDSolver<CoarseVector> HPDSolve(CoarseOp,CG,DeflCoarseGuesser);
c_res=Zero();
HPDSolve(c_src,c_res); c_ref = c_res;
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"ref norm "<<norm2(c_ref)<<std::endl;
//////////////////////////////////////////////////////////////////////////
// Deflated (with real op EV's) solve for the projected coarse op
// Work towards ADEF1 in the coarse space
//////////////////////////////////////////////////////////////////////////
HPDSolver<CoarseVector> HPDSolveProj(CoarseOpProj,CG,DeflCoarseGuesser);
c_res=Zero();
HPDSolveProj(c_src,c_res);
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"res norm "<<norm2(c_res)<<std::endl;
c_res = c_res - c_ref;
std::cout << "Projected solver error "<<norm2(c_res)<<std::endl;
//////////////////////////////////////////////////////////////////////
// Coarse ADEF1 with deflation space
//////////////////////////////////////////////////////////////////////
ChebyshevSmoother<CoarseVector,HermMatrix >
CoarseSmoother(1.0,37.,8,CoarseOpProj); // just go to sloppy 0.1 convergence
// CoarseSmoother(0.1,37.,8,CoarseOpProj); //
// CoarseSmoother(0.5,37.,6,CoarseOpProj); // 8 iter 0.36s
// CoarseSmoother(0.5,37.,12,CoarseOpProj); // 8 iter, 0.55s
// CoarseSmoother(0.5,37.,8,CoarseOpProj);// 7-9 iter
// CoarseSmoother(1.0,37.,8,CoarseOpProj); // 0.4 - 0.5s solve to 0.04, 7-9 iter
// ChebyshevSmoother<CoarseVector,HermMatrix > CoarseSmoother(0.5,36.,10,CoarseOpProj); // 311
////////////////////////////////////////////////////////
// CG, Cheby mode spacing 200,200
// Unprojected Coarse CG solve to 1e-8 : 190 iters, 4.9s
// Unprojected Coarse CG solve to 4e-2 : 33 iters, 0.8s
// Projected Coarse CG solve to 1e-8 : 100 iters, 0.36s
////////////////////////////////////////////////////////
// CoarseSmoother(1.0,48.,8,CoarseOpProj); 48 evecs
////////////////////////////////////////////////////////
// ADEF1 Coarse solve to 1e-8 : 44 iters, 2.34s 2.1x gain
// ADEF1 Coarse solve to 4e-2 : 7 iters, 0.4s
// HDCG 38 iters 162s
//
// CoarseSmoother(1.0,40.,8,CoarseOpProj); 48 evecs
// ADEF1 Coarse solve to 1e-8 : 37 iters, 2.0s 2.1x gain
// ADEF1 Coarse solve to 4e-2 : 6 iters, 0.36s
// HDCG 38 iters 169s
TwoLevelADEF1defl<CoarseVector>
cADEF1(1.0e-8, 500,
CoarseOp,
CoarseSmoother,
evec,eval);
c_res=Zero();
cADEF1(c_src,c_res);
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
c_res = c_res - c_ref;
std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
// cADEF1.Tolerance = 4.0e-2;
// cADEF1.Tolerance = 1.0e-1;
cADEF1.Tolerance = 5.0e-2;
c_res=Zero();
cADEF1(c_src,c_res);
std::cout << GridLogMessage<<"src norm "<<norm2(c_src)<<std::endl;
std::cout << GridLogMessage<<"cADEF1 res norm "<<norm2(c_res)<<std::endl;
c_res = c_res - c_ref;
std::cout << "cADEF1 solver error "<<norm2(c_res)<<std::endl;
//////////////////////////////////////////
// Build a smoother
//////////////////////////////////////////
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(10.0,100.0,10,FineHermOp); //499
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(3.0,100.0,10,FineHermOp); //383
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(1.0,100.0,10,FineHermOp); //328
// std::vector<RealD> los({0.5,1.0,3.0}); // 147/142/146 nbasis 1
// std::vector<RealD> los({1.0,2.0}); // Nbasis 24: 88,86 iterations
// std::vector<RealD> los({2.0,4.0}); // Nbasis 32 == 52, iters
// std::vector<RealD> los({2.0,4.0}); // Nbasis 40 == 36,36 iters
//
// Turns approx 2700 iterations into 340 fine multiplies with Nbasis 40
// Need to measure cost of coarse space.
//
// -- i) Reduce coarse residual -- 0.04
// -- ii) Lanczos on coarse space -- done
// -- iii) Possible 1 hop project and/or preconditioning it - easy - PrecCG it and
// use a limited stencil. Reread BFM code to check on evecs / deflation strategy with prec
//
std::vector<RealD> los({3.0}); // Nbasis 40 == 36,36 iters
// std::vector<int> ords({7,8,10}); // Nbasis 40 == 40,38,36 iters (320,342,396 mults)
std::vector<int> ords({7}); // Nbasis 40 == 40 iters (320 mults)
for(int l=0;l<los.size();l++){
RealD lo = los[l];
for(int o=0;o<ords.size();o++){
ConjugateGradient<CoarseVector> CGsloppy(4.0e-2,maxit,false);
HPDSolver<CoarseVector> HPDSolveSloppy(CoarseOp,CGsloppy,DeflCoarseGuesser);
// ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,10,FineHermOp); // 36 best case
ChebyshevSmoother<LatticeFermionD,HermFineMatrix > Smoother(lo,92,ords[o],FineHermOp); // 311
//////////////////////////////////////////
// Build a HDCG solver
//////////////////////////////////////////
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCG(1.0e-8, 100,
FineHermOp,
Smoother,
HPDSolveSloppy,
HPDSolve,
Aggregates);
TwoLevelADEF2<LatticeFermion,CoarseVector,Subspace>
HDCGdefl(1.0e-8, 100,
FineHermOp,
Smoother,
cADEF1,
HPDSolve,
Aggregates);
result=Zero();
HDCGdefl(src,result);
result=Zero();
HDCG(src,result);
}
}
// Standard CG
result=Zero();
CGfine(HermOpEO, src, result);
Grid_finalize();
return 0;
}