1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

develop pull

This commit is contained in:
Antonin Portelli 2020-04-10 17:19:18 +01:00
commit 4e864e56c9
40 changed files with 1489 additions and 769 deletions

View File

@ -35,6 +35,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/Zolotarev.h> #include <Grid/algorithms/approx/Zolotarev.h>
#include <Grid/algorithms/approx/Chebyshev.h> #include <Grid/algorithms/approx/Chebyshev.h>
#include <Grid/algorithms/approx/JacobiPolynomial.h>
#include <Grid/algorithms/approx/Remez.h> #include <Grid/algorithms/approx/Remez.h>
#include <Grid/algorithms/approx/MultiShiftFunction.h> #include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/approx/Forecast.h> #include <Grid/algorithms/approx/Forecast.h>

View File

@ -1,3 +1,14 @@
// blockZaxpy in bockPromote - 3s, 5%
// noncoalesced linalg in Preconditionoer ~ 3s 5%
// Lancos tuning or replace 10-20s ~ 25%, open ended
// setup tuning 5s ~ 8%
// -- e.g. ordermin, orderstep tunables.
// MdagM path without norm in LinOp code. few seconds
// Mdir calc blocking kernels
// Fuse kernels in blockMaskedInnerProduct
// preallocate Vectors in Cayley 5D ~ few percent few seconds
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -34,8 +45,35 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class vobj,class CComplex>
inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner1,
Lattice<CComplex> &CoarseInner2,
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask1,
const Lattice<decltype(innerProduct(vobj(),vobj()))> &FineMask2,
const Lattice<vobj> &fineX,
const Lattice<vobj> &fineY)
{
typedef decltype(innerProduct(vobj(),vobj())) dotp;
GridBase *coarse(CoarseInner1.Grid());
GridBase *fine (fineX.Grid());
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
Lattice<dotp> fine_inner_msk(fine);
// Multiply could be fused with innerProduct
// Single block sum kernel could do both masks.
fine_inner = localInnerProduct(fineX,fineY);
mult(fine_inner_msk, fine_inner,FineMask1);
blockSum(CoarseInner1,fine_inner_msk);
mult(fine_inner_msk, fine_inner,FineMask2);
blockSum(CoarseInner2,fine_inner_msk);
}
class Geometry { class Geometry {
// int dimension;
public: public:
int npoint; int npoint;
std::vector<int> directions ; std::vector<int> directions ;
@ -52,10 +90,10 @@ public:
directions.resize(npoint); directions.resize(npoint);
displacements.resize(npoint); displacements.resize(npoint);
for(int d=0;d<_d;d++){ for(int d=0;d<_d;d++){
directions[2*d ] = d+base; directions[d ] = d+base;
directions[2*d+1] = d+base; directions[d+_d] = d+base;
displacements[2*d ] = +1; displacements[d ] = +1;
displacements[2*d+1] = -1; displacements[d+_d]= -1;
} }
directions [2*_d]=0; directions [2*_d]=0;
displacements[2*_d]=0; displacements[2*_d]=0;
@ -63,7 +101,7 @@ public:
//// report back //// report back
std::cout<<GridLogMessage<<"directions :"; std::cout<<GridLogMessage<<"directions :";
for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " "; for(int d=0;d<npoint;d++) std::cout<< directions[d]<< " ";
std::cout <<std::endl; std::cout<<std::endl;
std::cout<<GridLogMessage<<"displacements :"; std::cout<<GridLogMessage<<"displacements :";
for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " "; for(int d=0;d<npoint;d++) std::cout<< displacements[d]<< " ";
std::cout<<std::endl; std::cout<<std::endl;
@ -115,10 +153,10 @@ public:
void Orthogonalise(void){ void Orthogonalise(void){
CoarseScalar InnerProd(CoarseGrid); CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl; std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,subspace);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,subspace); blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 2"<<std::endl; // Really have to do twice? Yuck
// blockOrthogonalise(InnerProd,subspace);
// std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl; // std::cout << GridLogMessage <<" Gramm-Schmidt checking orthogonality"<<std::endl;
// CheckOrthogonal(); // CheckOrthogonal();
} }
@ -128,7 +166,7 @@ public:
for(int i=0;i<nbasis;i++){ for(int i=0;i<nbasis;i++){
blockProject(iProj,subspace[i],subspace); blockProject(iProj,subspace[i],subspace);
eProj=Zero(); eProj=Zero();
thread_for(ss, CoarseGrid->oSites(),{ accelerator_for(ss, CoarseGrid->oSites(),1,{
eProj[ss](i)=CComplex(1.0); eProj[ss](i)=CComplex(1.0);
}); });
eProj=eProj - iProj; eProj=eProj - iProj;
@ -146,61 +184,9 @@ public:
void CreateSubspaceRandom(GridParallelRNG &RNG){ void CreateSubspaceRandom(GridParallelRNG &RNG){
for(int i=0;i<nbasis;i++){ for(int i=0;i<nbasis;i++){
random(RNG,subspace[i]); random(RNG,subspace[i]);
std::cout<<GridLogMessage<<" norm subspace["<<i<<"] "<<norm2(subspace[i])<<std::endl;
} }
Orthogonalise();
} }
/*
virtual void CreateSubspaceLanczos(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis)
{
// Run a Lanczos with sloppy convergence
const int Nstop = nn;
const int Nk = nn+20;
const int Np = nn+20;
const int Nm = Nk+Np;
const int MaxIt= 10000;
RealD resid = 1.0e-3;
Chebyshev<FineField> Cheb(0.5,64.0,21);
ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
// IRL.lock = 1;
FineField noise(FineGrid); gaussian(RNG,noise);
FineField tmp(FineGrid);
std::vector<RealD> eval(Nm);
std::vector<FineField> evec(Nm,FineGrid);
int Nconv;
IRL.calc(eval,evec,
noise,
Nconv);
// pull back nn vectors
for(int b=0;b<nn;b++){
subspace[b] = evec[b];
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
hermop.Op(subspace[b],tmp);
std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
noise = tmp - sqrt(eval[b])*subspace[b] ;
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
noise = tmp + eval[b]*subspace[b] ;
std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<" ; [ M - Lambda ]_"<<b<<" vec_"<<b<<" = " <<norm2(noise)<<std::endl;
}
Orthogonalise();
for(int b=0;b<nn;b++){
std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
}
}
*/
virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { virtual void CreateSubspace(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {
RealD scale; RealD scale;
@ -232,54 +218,316 @@ public:
subspace[b] = noise; subspace[b] = noise;
} }
Orthogonalise();
} }
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) { ////////////////////////////////////////////////////////////////////////////////////////////////
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
// and this is the best I found
////////////////////////////////////////////////////////////////////////////////////////////////
#if 1
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale; RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
Chebyshev<FineField> Cheb(0.1,64.0,900); // New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
{
// Filter
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
Cheb(hermop,noise,Mn);
// normalise
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Generate a full sequence of Chebyshevs
{
lo=filterlo;
noise=Mn;
FineField T0(FineGrid); T0 = noise;
FineField T1(FineGrid);
FineField T2(FineGrid);
FineField y(FineGrid);
FineField *Tnm = &T0;
FineField *Tn = &T1;
FineField *Tnp = &T2;
// Tn=T1 = (xscale M + mscale)in
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
hermop.HermOp(T0,y);
T1=y*xscale+noise*mscale;
for(int n=2;n<=ordermin+orderstep*(nn-2);n++){
hermop.HermOp(*Tn,y);
auto y_v = y.View();
auto Tn_v = Tn->View();
auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View();
const int Nsimd = CComplex::Nsimd();
accelerator_forNB(ss, FineGrid->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});
// Possible more fine grained control is needed than a linear sweep,
// but huge productivity gain if this is simple algorithm and not a tunable
int m =1;
if ( n>=ordermin ) m=n-ordermin;
if ( (m%orderstep)==0 ) {
Mn=*Tnp;
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
}
// Cycle pointers to avoid copies
FineField *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
}
assert(b==nn);
}
#endif
#if 0
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid); FineField noise(FineGrid);
FineField Mn(FineGrid); FineField Mn(FineGrid);
FineField tmp(FineGrid);
FineField combined(FineGrid);
for(int b=0;b<nn;b++){ // New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
gaussian(RNG,noise); // Initial matrix element
scale = std::pow(norm2(noise),-0.5); hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
noise=noise*scale;
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
Cheb(hermop,noise,Mn);
scale = std::pow(norm2(Mn),-0.5);
Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,noise); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(noise)<<std::endl;
int b =0;
#define FILTERb(llo,hhi,oorder) \
{ \
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
Cheb(hermop,noise,Mn); \
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \
subspace[b] = Mn; \
hermop.Op(Mn,tmp); \
std::cout<<GridLogMessage << oorder<< " Cheb filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
b++; \
} }
Orthogonalise(); // JacobiPolynomial<FineField> Cheb(0.002,60.0,1500,-0.5,3.5); \
RealD alpha=-0.8;
RealD beta =-0.8;
#define FILTER(llo,hhi,oorder) \
{ \
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
/* JacobiPolynomial<FineField> Cheb(0.0,60.0,oorder,alpha,beta);*/\
Cheb(hermop,noise,Mn); \
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \
subspace[b] = Mn; \
hermop.Op(Mn,tmp); \
std::cout<<GridLogMessage << oorder<< "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
b++; \
}
#define FILTERc(llo,hhi,oorder) \
{ \
Chebyshev<FineField> Cheb(llo,hhi,oorder); \
Cheb(hermop,noise,combined); \
}
double node = 0.000;
FILTERb(lo,hi,orderfilter);// 0
// FILTERc(node,hi,51);// 0
noise = Mn;
int base = 0;
int mult = 100;
FILTER(node,hi,base+1*mult);
FILTER(node,hi,base+2*mult);
FILTER(node,hi,base+3*mult);
FILTER(node,hi,base+4*mult);
FILTER(node,hi,base+5*mult);
FILTER(node,hi,base+6*mult);
FILTER(node,hi,base+7*mult);
FILTER(node,hi,base+8*mult);
FILTER(node,hi,base+9*mult);
FILTER(node,hi,base+10*mult);
FILTER(node,hi,base+11*mult);
FILTER(node,hi,base+12*mult);
FILTER(node,hi,base+13*mult);
FILTER(node,hi,base+14*mult);
FILTER(node,hi,base+15*mult);
assert(b==nn);
} }
#endif
#if 0
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
int nn,
double hi,
double lo,
int orderfilter,
int ordermin,
int orderstep,
double filterlo
) {
RealD scale;
FineField noise(FineGrid);
FineField Mn(FineGrid);
FineField tmp(FineGrid);
FineField combined(FineGrid);
// New normalised noise
gaussian(RNG,noise);
scale = std::pow(norm2(noise),-0.5);
noise=noise*scale;
// Initial matrix element
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
int b =0;
{
Chebyshev<FineField> JacobiPoly(0.005,60.,1500);
// JacobiPolynomial<FineField> JacobiPoly(0.002,60.0,1500,-0.5,3.5);
//JacobiPolynomial<FineField> JacobiPoly(0.03,60.0,500,-0.5,3.5);
// JacobiPolynomial<FineField> JacobiPoly(0.00,60.0,1000,-0.5,3.5);
JacobiPoly(hermop,noise,Mn);
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
subspace[b] = Mn;
hermop.Op(Mn,tmp);
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
b++;
// scale = std::pow(norm2(tmp),-0.5); tmp=tmp*scale;
// subspace[b] = tmp; b++;
// }
}
#define FILTER(lambda) \
{ \
hermop.HermOp(subspace[0],tmp); \
tmp = tmp - lambda *subspace[0]; \
scale = std::pow(norm2(tmp),-0.5); \
tmp=tmp*scale; \
subspace[b] = tmp; \
hermop.Op(subspace[b],tmp); \
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl; \
b++; \
}
// scale = std::pow(norm2(tmp),-0.5); tmp=tmp*scale;
// subspace[b] = tmp; b++;
// }
FILTER(2.0e-5);
FILTER(2.0e-4);
FILTER(4.0e-4);
FILTER(8.0e-4);
FILTER(8.0e-4);
FILTER(2.0e-3);
FILTER(3.0e-3);
FILTER(4.0e-3);
FILTER(5.0e-3);
FILTER(6.0e-3);
FILTER(2.5e-3);
FILTER(3.5e-3);
FILTER(4.5e-3);
FILTER(5.5e-3);
FILTER(6.5e-3);
// FILTER(6.0e-5);//6
// FILTER(7.0e-5);//8
// FILTER(8.0e-5);//9
// FILTER(9.0e-5);//3
/*
// FILTER(1.0e-4);//10
FILTER(2.0e-4);//11
// FILTER(3.0e-4);//12
// FILTER(4.0e-4);//13
FILTER(5.0e-4);//14
FILTER(6.0e-3);//4
FILTER(7.0e-4);//1
FILTER(8.0e-4);//7
FILTER(9.0e-4);//15
FILTER(1.0e-3);//2
FILTER(2.0e-3);//2
FILTER(3.0e-3);//2
FILTER(4.0e-3);//2
FILTER(5.0e-3);//2
FILTER(6.0e-3);//2
FILTER(7.0e-3);//2
FILTER(8.0e-3);//2
FILTER(1.0e-2);//2
*/
std::cout << GridLogMessage <<"Jacobi filtering done" <<std::endl;
assert(b==nn);
}
#endif
}; };
// Fine Object == (per site) type of fine field // Fine Object == (per site) type of fine field
// nbasis == number of deflation vectors // nbasis == number of deflation vectors
template<class Fobj,class CComplex,int nbasis> template<class Fobj,class CComplex,int nbasis>
class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > { class CoarsenedMatrix : public SparseMatrixBase<Lattice<iVector<CComplex,nbasis > > > {
public: public:
typedef iVector<CComplex,nbasis > siteVector; typedef iVector<CComplex,nbasis > siteVector;
typedef Lattice<CComplex > CoarseComplexField;
typedef Lattice<siteVector> CoarseVector; typedef Lattice<siteVector> CoarseVector;
typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix; typedef Lattice<iMatrix<CComplex,nbasis > > CoarseMatrix;
typedef iMatrix<CComplex,nbasis > Cobj;
typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj > FineField; typedef Lattice<Fobj > FineField;
@ -294,7 +542,6 @@ public:
std::vector<CoarseMatrix> A; std::vector<CoarseMatrix> A;
/////////////////////// ///////////////////////
// Interface // Interface
/////////////////////// ///////////////////////
@ -305,33 +552,71 @@ public:
conformable(_grid,in.Grid()); conformable(_grid,in.Grid());
conformable(in.Grid(),out.Grid()); conformable(in.Grid(),out.Grid());
RealD Nin = norm2(in); // RealD Nin = norm2(in);
SimpleCompressor<siteVector> compressor; SimpleCompressor<siteVector> compressor;
double comms_usec = -usecond();
Stencil.HaloExchange(in,compressor); Stencil.HaloExchange(in,compressor);
comms_usec += usecond();
auto in_v = in.View(); auto in_v = in.View();
auto out_v = out.View(); auto out_v = out.View();
thread_for(ss,Grid()->oSites(),{ typedef LatticeView<Cobj> Aview;
siteVector res = Zero();
siteVector nbr; Vector<Aview> AcceleratorViewContainer;
for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
Aview *Aview_p = & AcceleratorViewContainer[0];
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
GridStopWatch ArithmeticTimer;
int osites=Grid()->oSites();
// double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint;
// double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex);
double usecs =-usecond();
// assert(geom.npoint==9);
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype; int ptype;
StencilEntry *SE; StencilEntry *SE;
int lane=SIMTlane(Nsimd);
for(int point=0;point<geom.npoint;point++){ for(int point=0;point<geom.npoint;point++){
SE=Stencil.GetEntry(ptype,point,ss); SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local&&SE->_permute) { if(SE->_is_local) {
permute(nbr,in_v[SE->_offset],ptype); nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
} else if(SE->_is_local) {
nbr = in_v[SE->_offset];
} else { } else {
nbr = Stencil.CommBuf()[SE->_offset]; nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
}
synchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
} }
auto A_point = A[point].View();
res = res + A_point[ss]*nbr;
} }
vstream(out_v[ss],res); coalescedWrite(out_v[ss](b),res,lane);
}); });
usecs +=usecond();
double nrm_usec=-usecond();
RealD Nout= norm2(out); RealD Nout= norm2(out);
nrm_usec+=usecond();
/*
std::cout << GridLogMessage << "\tNorm " << nrm_usec << " us" <<std::endl;
std::cout << GridLogMessage << "\tHalo " << comms_usec << " us" <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << usecs << " us" <<std::endl;
std::cout << GridLogMessage << "\t mflop/s " << flops/usecs<<std::endl;
std::cout << GridLogMessage << "\t MB/s " << bytes/usecs<<std::endl;
*/
return Nout; return Nout;
}; };
@ -349,25 +634,54 @@ public:
return norm2(out); return norm2(out);
} }
}; };
void MdirComms(const CoarseVector &in)
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ {
conformable(_grid,in.Grid());
conformable(in.Grid(),out.Grid());
SimpleCompressor<siteVector> compressor; SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor); Stencil.HaloExchange(in,compressor);
}
void MdirCalc(const CoarseVector &in, CoarseVector &out, int point)
{
conformable(_grid,in.Grid());
conformable(_grid,out.Grid());
auto point = [dir, disp](){ typedef LatticeView<Cobj> Aview;
if(dir == 0 and disp == 0) Vector<Aview> AcceleratorViewContainer;
return 8; for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer.push_back(A[p].View());
else Aview *Aview_p = & AcceleratorViewContainer[0];
return (4 * dir + 1 - disp) / 2;
}();
auto out_v = out.View(); auto out_v = out.View();
auto in_v = in.View(); auto in_v = in.View();
thread_for(ss,Grid()->oSites(),{
const int Nsimd = CComplex::Nsimd();
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
int ss = sss/nbasis;
int b = sss%nbasis;
calcComplex res = Zero();
calcVector nbr;
int ptype;
StencilEntry *SE;
int lane=SIMTlane(Nsimd);
SE=Stencil.GetEntry(ptype,point,ss);
if(SE->_is_local) {
nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane);
} else {
nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane);
}
synchronise();
for(int bb=0;bb<nbasis;bb++) {
res = res + coalescedRead(Aview_p[point][ss](b,bb))*nbr(bb);
}
coalescedWrite(out_v[ss](b),res,lane);
});
#if 0
accelerator_for(ss,Grid()->oSites(),1,{
siteVector res = Zero(); siteVector res = Zero();
siteVector nbr; siteVector nbr;
int ptype; int ptype;
@ -382,16 +696,65 @@ public:
} else { } else {
nbr = Stencil.CommBuf()[SE->_offset]; nbr = Stencil.CommBuf()[SE->_offset];
} }
synchronise();
auto A_point = A[point].View(); res = res + Aview_p[point][ss]*nbr;
res = res + A_point[ss]*nbr;
vstream(out_v[ss],res); out_v[ss]=res;
}); });
#endif
}
void MdirAll(const CoarseVector &in,std::vector<CoarseVector> &out)
{
this->MdirComms(in);
int ndir=geom.npoint-1;
if ((out.size()!=ndir)&&(out.size()!=ndir+1)) {
std::cout <<"MdirAll out size "<< out.size()<<std::endl;
std::cout <<"MdirAll ndir "<< ndir<<std::endl;
assert(0);
}
for(int p=0;p<ndir;p++){
MdirCalc(in,out[p],p);
}
};
void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
this->MdirComms(in);
int ndim = in.Grid()->Nd();
//////////////
// 4D action like wilson
// 0+ => 0
// 0- => 1
// 1+ => 2
// 1- => 3
// etc..
//////////////
// 5D action like DWF
// 1+ => 0
// 1- => 1
// 2+ => 2
// 2- => 3
// etc..
auto point = [dir, disp, ndim](){
if(dir == 0 and disp == 0)
return 8;
else if ( ndim==4 ) {
return (4 * dir + 1 - disp) / 2;
} else {
return (4 * (dir-1) + 1 - disp) / 2;
}
}();
MdirCalc(in,out,point);
}; };
void Mdiag(const CoarseVector &in, CoarseVector &out){ void Mdiag(const CoarseVector &in, CoarseVector &out)
Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil {
int point=geom.npoint-1;
MdirCalc(in, out, point); // No comms
}; };
@ -401,25 +764,44 @@ public:
geom(CoarseGrid._ndimension), geom(CoarseGrid._ndimension),
hermitian(hermitian_), hermitian(hermitian_),
Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
A(geom.npoint,&CoarseGrid) A(geom.npoint,&CoarseGrid)
{ {
}; };
void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace){ Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
typedef Lattice<typename Fobj::tensor_reduced> FineComplexField;
typedef typename Fobj::scalar_type scalar_type;
FineField iblock(FineGrid); // contributions from within this block FineComplexField one(FineGrid); one=scalar_type(1.0,0.0);
FineField oblock(FineGrid); // contributions from outwith this block FineComplexField zero(FineGrid); zero=scalar_type(0.0,0.0);
std::vector<FineComplexField> masks(geom.npoint,FineGrid);
FineComplexField imask(FineGrid); // contributions from within this block
FineComplexField omask(FineGrid); // contributions from outwith this block
FineComplexField evenmask(FineGrid);
FineComplexField oddmask(FineGrid);
FineField phi(FineGrid); FineField phi(FineGrid);
FineField tmp(FineGrid); FineField tmp(FineGrid);
FineField zz(FineGrid); zz=Zero(); FineField zz(FineGrid); zz=Zero();
FineField Mphi(FineGrid); FineField Mphi(FineGrid);
FineField Mphie(FineGrid);
FineField Mphio(FineGrid);
std::vector<FineField> Mphi_p(geom.npoint,FineGrid);
Lattice<iScalar<vInteger> > coor(FineGrid); Lattice<iScalar<vInteger> > coor (FineGrid);
Lattice<iScalar<vInteger> > bcoor(FineGrid);
Lattice<iScalar<vInteger> > bcb (FineGrid);
CoarseVector iProj(Grid()); CoarseVector iProj(Grid());
CoarseVector oProj(Grid()); CoarseVector oProj(Grid());
CoarseVector SelfProj(Grid());
CoarseComplexField iZProj(Grid());
CoarseComplexField oZProj(Grid());
CoarseScalar InnerProd(Grid()); CoarseScalar InnerProd(Grid());
// Orthogonalise the subblocks over the basis // Orthogonalise the subblocks over the basis
@ -428,69 +810,114 @@ public:
// Compute the matrix elements of linop between this orthonormal // Compute the matrix elements of linop between this orthonormal
// set of vectors. // set of vectors.
int self_stencil=-1; int self_stencil=-1;
for(int p=0;p<geom.npoint;p++){ for(int p=0;p<geom.npoint;p++)
{
int dir = geom.directions[p];
int disp = geom.displacements[p];
A[p]=Zero(); A[p]=Zero();
if( geom.displacements[p]==0){ if( geom.displacements[p]==0){
self_stencil=p; self_stencil=p;
} }
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]);
LatticeCoordinate(coor,dir);
///////////////////////////////////////////////////////
// Work out even and odd block checkerboarding for fast diagonal term
///////////////////////////////////////////////////////
if ( disp==1 ) {
bcb = bcb + div(coor,block);
}
if ( disp==0 ) {
masks[p]= Zero();
} else if ( disp==1 ) {
masks[p] = where(mod(coor,block)==(block-1),one,zero);
} else if ( disp==-1 ) {
masks[p] = where(mod(coor,block)==(Integer)0,one,zero);
}
} }
evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
oddmask = one-evenmask;
assert(self_stencil!=-1); assert(self_stencil!=-1);
for(int i=0;i<nbasis;i++){ for(int i=0;i<nbasis;i++){
phi=Subspace.subspace[i]; phi=Subspace.subspace[i];
std::cout<<GridLogMessage<<"("<<i<<").."<<std::endl; // std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
linop.OpDirAll(phi,Mphi_p);
linop.OpDiag (phi,Mphi_p[geom.npoint-1]);
for(int p=0;p<geom.npoint;p++){ for(int p=0;p<geom.npoint;p++){
Mphi = Mphi_p[p];
int dir = geom.directions[p]; int dir = geom.directions[p];
int disp = geom.displacements[p]; int disp = geom.displacements[p];
Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); if ( (disp==-1) || (!hermitian ) ) {
LatticeCoordinate(coor,dir); ////////////////////////////////////////////////////////////////////////
// Pick out contributions coming from this cell and neighbour cell
////////////////////////////////////////////////////////////////////////
omask = masks[p];
imask = one-omask;
if ( disp==0 ){
linop.OpDiag(phi,Mphi);
}
else {
linop.OpDir(phi,Mphi,dir,disp);
}
////////////////////////////////////////////////////////////////////////
// Pick out contributions coming from this cell and neighbour cell
////////////////////////////////////////////////////////////////////////
if ( disp==0 ) {
iblock = Mphi;
oblock = Zero();
} else if ( disp==1 ) {
oblock = where(mod(coor,block)==(block-1),Mphi,zz);
iblock = where(mod(coor,block)!=(block-1),Mphi,zz);
} else if ( disp==-1 ) {
oblock = where(mod(coor,block)==(Integer)0,Mphi,zz);
iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz);
} else {
assert(0);
}
Subspace.ProjectToSubspace(iProj,iblock);
Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace);
auto iProj_v = iProj.View() ;
auto oProj_v = oProj.View() ;
auto A_p = A[p].View();
auto A_self = A[self_stencil].View();
thread_for(ss, Grid()->oSites(),{
for(int j=0;j<nbasis;j++){ for(int j=0;j<nbasis;j++){
if( disp!= 0 ) {
A_p[ss](j,i) = oProj_v[ss](j); blockMaskedInnerProduct(iZProj,oZProj,imask,omask,Subspace.subspace[j],Mphi);
}
A_self[ss](j,i) = A_self[ss](j,i) + iProj_v[ss](j); auto iZProj_v = iZProj.View() ;
auto oZProj_v = oZProj.View() ;
auto A_p = A[p].View();
auto A_self = A[self_stencil].View();
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
}
}
}
///////////////////////////////////////////
// Faster alternate self coupling.. use hermiticity to save 2x
///////////////////////////////////////////
{
mult(tmp,phi,evenmask); linop.Op(tmp,Mphie);
mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio);
// tmp = Mphie*evenmask + Mphio*oddmask;
{
auto tmp_ = tmp.View();
auto evenmask_ = evenmask.View();
auto oddmask_ = oddmask.View();
auto Mphie_ = Mphie.View();
auto Mphio_ = Mphio.View();
accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{
coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss));
});
}
blockProject(SelfProj,tmp,Subspace.subspace);
auto SelfProj_ = SelfProj.View();
auto A_self = A[self_stencil].View();
accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{
for(int j=0;j<nbasis;j++){
coalescedWrite(A_self[ss](j,i), SelfProj_(ss)(j));
} }
}); });
} }
} }
if(hermitian) {
std::cout << GridLogMessage << " ForceHermitian "<<std::endl;
ForceHermitian();
}
// AssertHermitian();
// ForceDiagonal();
}
#if 0 #if 0
/////////////////////////// ///////////////////////////
@ -513,17 +940,26 @@ public:
std::cout<<GridLogMessage<< iProj <<std::endl; std::cout<<GridLogMessage<< iProj <<std::endl;
std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl; std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
#endif #endif
// ForceHermitian();
// AssertHermitian();
// ForceDiagonal();
}
void ForceHermitian(void) { void ForceHermitian(void) {
for(int d=0;d<4;d++){ CoarseMatrix Diff (Grid());
int dd=d+1; for(int p=0;p<geom.npoint;p++){
A[2*d] = adj(Cshift(A[2*d+1],dd,1)); int dir = geom.directions[p];
int disp = geom.displacements[p];
if(disp==-1) {
// Find the opposite link
for(int pp=0;pp<geom.npoint;pp++){
int dirp = geom.directions[pp];
int dispp = geom.displacements[pp];
if ( (dirp==dir) && (dispp==1) ){
// Diff = adj(Cshift(A[p],dir,1)) - A[pp];
// std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
A[pp] = adj(Cshift(A[p],dir,1));
}
}
}
} }
// A[8] = 0.5*(A[8] + adj(A[8]));
} }
void AssertHermitian(void) { void AssertHermitian(void) {
CoarseMatrix AA (Grid()); CoarseMatrix AA (Grid());

View File

@ -47,6 +47,7 @@ public:
// Support for coarsening to a multigrid // Support for coarsening to a multigrid
virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base
virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base
virtual void OpDirAll (const Field &in, std::vector<Field> &out) = 0; // Abstract base
virtual void Op (const Field &in, Field &out) = 0; // Abstract base virtual void Op (const Field &in, Field &out) = 0; // Abstract base
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
@ -83,6 +84,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) { void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp); _Mat.Mdir(in,out,dir,disp);
} }
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){ void Op (const Field &in, Field &out){
_Mat.M(in,out); _Mat.M(in,out);
} }
@ -93,8 +97,7 @@ public:
_Mat.MdagM(in,out,n1,n2); _Mat.MdagM(in,out,n1,n2);
} }
void HermOp(const Field &in, Field &out){ void HermOp(const Field &in, Field &out){
RealD n1,n2; _Mat.MdagM(in,out);
HermOpAndNorm(in,out,n1,n2);
} }
}; };
@ -116,6 +119,9 @@ public:
_Mat.Mdir(in,out,dir,disp); _Mat.Mdir(in,out,dir,disp);
assert(0); assert(0);
} }
void OpDirAll (const Field &in, std::vector<Field> &out){
assert(0);
};
void Op (const Field &in, Field &out){ void Op (const Field &in, Field &out){
_Mat.M(in,out); _Mat.M(in,out);
assert(0); assert(0);
@ -154,6 +160,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) { void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp); _Mat.Mdir(in,out,dir,disp);
} }
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){ void Op (const Field &in, Field &out){
_Mat.M(in,out); _Mat.M(in,out);
} }
@ -162,7 +171,6 @@ public:
} }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
_Mat.M(in,out); _Mat.M(in,out);
ComplexD dot= innerProduct(in,out); n1=real(dot); ComplexD dot= innerProduct(in,out); n1=real(dot);
n2=norm2(out); n2=norm2(out);
} }
@ -183,6 +191,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) { void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp); _Mat.Mdir(in,out,dir,disp);
} }
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){ void Op (const Field &in, Field &out){
_Mat.M(in,out); _Mat.M(in,out);
} }
@ -234,6 +245,9 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) { void OpDir (const Field &in, Field &out,int dir,int disp) {
assert(0); assert(0);
} }
void OpDirAll (const Field &in, std::vector<Field> &out){
assert(0);
};
}; };
template<class Matrix,class Field> template<class Matrix,class Field>
class SchurDiagMooeeOperator : public SchurOperatorBase<Field> { class SchurDiagMooeeOperator : public SchurOperatorBase<Field> {

View File

@ -45,8 +45,13 @@ public:
ni=M(in,tmp); ni=M(in,tmp);
no=Mdag(tmp,out); no=Mdag(tmp,out);
} }
virtual void MdagM(const Field &in, Field &out) {
RealD ni, no;
MdagM(in,out,ni,no);
}
virtual void Mdiag (const Field &in, Field &out)=0; virtual void Mdiag (const Field &in, Field &out)=0;
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
}; };
///////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////
@ -56,12 +61,12 @@ template<class Field> class CheckerBoardedSparseMatrixBase : public SparseMatrix
public: public:
virtual GridBase *RedBlackGrid(void)=0; virtual GridBase *RedBlackGrid(void)=0;
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Query the even even properties to make algorithmic decisions // Query the even even properties to make algorithmic decisions
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
virtual RealD Mass(void) { return 0.0; }; virtual RealD Mass(void) { return 0.0; };
virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden
virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better
// half checkerboard operaions // half checkerboard operaions
virtual void Meooe (const Field &in, Field &out)=0; virtual void Meooe (const Field &in, Field &out)=0;

View File

@ -95,6 +95,24 @@ public:
Coeffs[order-1] = 1.; Coeffs[order-1] = 1.;
}; };
// PB - more efficient low pass drops high modes above the low as 1/x uses all Chebyshev's.
// Similar kick effect below the threshold as Lanczos filter approach
void InitLowPass(RealD _lo,RealD _hi,int _order)
{
lo=_lo;
hi=_hi;
order=_order;
if(order < 2) exit(-1);
Coeffs.resize(order);
for(int j=0;j<order;j++){
RealD k=(order-1.0);
RealD s=std::cos( j*M_PI*(k+0.5)/order );
Coeffs[j] = s * 2.0/order;
}
};
void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD)) void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD))
{ {
lo=_lo; lo=_lo;
@ -234,20 +252,20 @@ public:
RealD xscale = 2.0/(hi-lo); RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo); RealD mscale = -(hi+lo)/(hi-lo);
Linop.HermOp(T0,y); Linop.HermOp(T0,y);
T1=y*xscale+in*mscale; axpby(T1,xscale,mscale,y,in);
// sum = .5 c[0] T0 + c[1] T1 // sum = .5 c[0] T0 + c[1] T1
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1; // out = ()*T0 + Coeffs[1]*T1;
axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
for(int n=2;n<order;n++){ for(int n=2;n<order;n++){
Linop.HermOp(*Tn,y); Linop.HermOp(*Tn,y);
// y=xscale*y+mscale*(*Tn);
y=xscale*y+mscale*(*Tn); // *Tnp=2.0*y-(*Tnm);
// out=out+Coeffs[n]* (*Tnp);
*Tnp=2.0*y-(*Tnm); axpby(y,xscale,mscale,y,(*Tn));
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
out=out+Coeffs[n]* (*Tnp); axpy(out,Coeffs[n],*Tnp,out);
// Cycle pointers to avoid copies // Cycle pointers to avoid copies
Field *swizzle = Tnm; Field *swizzle = Tnm;
Tnm =Tn; Tnm =Tn;

View File

@ -0,0 +1,129 @@
#ifndef GRID_JACOBIPOLYNOMIAL_H
#define GRID_JACOBIPOLYNOMIAL_H
#include <Grid/algorithms/LinearOperator.h>
NAMESPACE_BEGIN(Grid);
template<class Field>
class JacobiPolynomial : public OperatorFunction<Field> {
private:
using OperatorFunction<Field>::operator();
int order;
RealD hi;
RealD lo;
RealD alpha;
RealD beta;
public:
void csv(std::ostream &out){
csv(out,lo,hi);
}
void csv(std::ostream &out,RealD llo,RealD hhi){
RealD diff = hhi-llo;
RealD delta = diff*1.0e-5;
for (RealD x=llo-delta; x<=hhi; x+=delta) {
RealD f = approx(x);
out<< x<<" "<<f <<std::endl;
}
return;
}
JacobiPolynomial(){};
JacobiPolynomial(RealD _lo,RealD _hi,int _order,RealD _alpha, RealD _beta)
{
lo=_lo;
hi=_hi;
alpha=_alpha;
beta=_beta;
order=_order;
};
RealD approx(RealD x) // Convenience for plotting the approximation
{
RealD Tn;
RealD Tnm;
RealD Tnp;
RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
RealD T0=1.0;
RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
Tn =T1;
Tnm=T0;
for(int n=2;n<=order;n++){
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
Tnm=Tn;
Tn =Tnp;
}
return Tnp;
};
// Implement the required interface
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in.Grid();
int vol=grid->gSites();
Field T0(grid);
Field T1(grid);
Field T2(grid);
Field y(grid);
Field *Tnm = &T0;
Field *Tn = &T1;
Field *Tnp = &T2;
// RealD T0=1.0;
T0=in;
// RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo));
// = x * 2/(hi-lo) - (hi+lo)/(hi-lo)
Linop.HermOp(T0,y);
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
Linop.HermOp(T0,y);
y=y*xscale+in*mscale;
// RealD T1=(alpha-beta)*0.5+(alpha+beta+2.0)*0.5*y;
RealD halfAmB = (alpha-beta)*0.5;
RealD halfApBp2= (alpha+beta+2.0)*0.5;
T1 = halfAmB * in + halfApBp2*y;
for(int n=2;n<=order;n++){
Linop.HermOp(*Tn,y);
y=xscale*y+mscale*(*Tn);
RealD cnp = 2.0*n*(n+alpha+beta)*(2.0*n-2.0+alpha+beta);
RealD cny = (2.0*n-2.0+alpha+beta)*(2.0*n-1.0+alpha+beta)*(2.0*n+alpha+beta);
RealD cn1 = (2.0*n+alpha+beta-1.0)*(alpha*alpha-beta*beta);
RealD cnm = - 2.0*(n+alpha-1.0)*(n+beta-1.0)*(2.0*n+alpha+beta);
// Tnp= ( cny * y *Tn + cn1 * Tn + cnm * Tnm )/ cnp;
cny=cny/cnp;
cn1=cn1/cnp;
cn1=cn1/cnp;
cnm=cnm/cnp;
*Tnp=cny*y + cn1 *(*Tn) + cnm * (*Tnm);
// Cycle pointers to avoid copies
Field *swizzle = Tnm;
Tnm =Tn;
Tn =Tnp;
Tnp =swizzle;
}
out=*Tnp;
}
};
NAMESPACE_END(Grid);
#endif

View File

@ -72,7 +72,6 @@ public:
RealD guess = norm2(psi); RealD guess = norm2(psi);
assert(std::isnan(guess) == 0); assert(std::isnan(guess) == 0);
Linop.HermOpAndNorm(psi, mmp, d, b); Linop.HermOpAndNorm(psi, mmp, d, b);
r = src - mmp; r = src - mmp;
@ -154,18 +153,18 @@ public:
RealD resnorm = std::sqrt(norm2(p)); RealD resnorm = std::sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm; RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<<std::endl; << "\tComputed residual " << std::sqrt(cp / ssq)
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; << "\tTrue residual " << true_residual
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; << "\tTarget " << Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl; std::cout << GridLogIterative << "Time breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; std::cout << GridLogIterative << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl; std::cout << GridLogIterative << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl; std::cout << GridLogIterative << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInner " << InnerTimer.Elapsed() <<std::endl; std::cout << GridLogIterative << "\tInner " << InnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl; std::cout << GridLogIterative << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl; std::cout << GridLogIterative << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

View File

@ -43,6 +43,11 @@ NAMESPACE_BEGIN(Grid);
template<class Field> template<class Field>
void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k) void basisOrthogonalize(std::vector<Field> &basis,Field &w,int k)
{ {
// If assume basis[j] are already orthonormal,
// can take all inner products in parallel saving 2x bandwidth
// Save 3x bandwidth on the second line of loop.
// perhaps 2.5x speed up.
// 2x overall in Multigrid Lanczos
for(int j=0; j<k; ++j){ for(int j=0; j<k; ++j){
auto ip = innerProduct(basis[j],w); auto ip = innerProduct(basis[j],w);
w = w - ip*basis[j]; w = w - ip*basis[j];
@ -282,7 +287,7 @@ public:
RealD _eresid, // resid in lmdue deficit RealD _eresid, // resid in lmdue deficit
int _MaxIter, // Max iterations int _MaxIter, // Max iterations
RealD _betastp=0.0, // if beta(k) < betastp: converged RealD _betastp=0.0, // if beta(k) < betastp: converged
int _MinRestart=1, int _orth_period = 1, int _MinRestart=0, int _orth_period = 1,
IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) : IRLdiagonalisation _diagonalisation= IRLdiagonaliseWithEigen) :
SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester), SimpleTester(HermOp), _PolyOp(PolyOp), _HermOp(HermOp), _Tester(Tester),
Nstop(_Nstop) , Nk(_Nk), Nm(_Nm), Nstop(_Nstop) , Nk(_Nk), Nm(_Nm),
@ -347,7 +352,7 @@ until convergence
GridBase *grid = src.Grid(); GridBase *grid = src.Grid();
assert(grid == evec[0].Grid()); assert(grid == evec[0].Grid());
GridLogIRL.TimingMode(1); // GridLogIRL.TimingMode(1);
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl; std::cout << GridLogIRL <<" ImplicitlyRestartedLanczos::calc() starting iteration 0 / "<< MaxIter<< std::endl;
std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; std::cout << GridLogIRL <<"**************************************************************************"<< std::endl;
@ -577,11 +582,11 @@ until convergence
/* Saad PP. 195 /* Saad PP. 195
1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0 1. Choose an initial vector v1 of 2-norm unity. Set β1 0, v0 0
2. For k = 1,2,...,m Do: 2. For k = 1,2,...,m Do:
3. wk:=Avkβkv_{k1} 3. wk:=Avk - b_k v_{k-1}
4. αk:=(wk,vk) // 4. ak:=(wk,vk) //
5. wk:=wkαkvk // wk orthog vk 5. wk:=wk-akvk // wk orthog vk
6. βk+1 := wk2. If βk+1 = 0 then Stop 6. bk+1 := ||wk||_2. If b_k+1 = 0 then Stop
7. vk+1 := wk/βk+1 7. vk+1 := wk/b_k+1
8. EndDo 8. EndDo
*/ */
void step(std::vector<RealD>& lmd, void step(std::vector<RealD>& lmd,
@ -589,6 +594,7 @@ until convergence
std::vector<Field>& evec, std::vector<Field>& evec,
Field& w,int Nm,int k) Field& w,int Nm,int k)
{ {
std::cout<<GridLogIRL << "Lanczos step " <<k<<std::endl;
const RealD tiny = 1.0e-20; const RealD tiny = 1.0e-20;
assert( k< Nm ); assert( k< Nm );
@ -600,20 +606,20 @@ until convergence
if(k>0) w -= lme[k-1] * evec[k-1]; if(k>0) w -= lme[k-1] * evec[k-1];
ComplexD zalph = innerProduct(evec_k,w); // 4. αk:=(wk,vk) ComplexD zalph = innerProduct(evec_k,w);
RealD alph = real(zalph); RealD alph = real(zalph);
w = w - alph * evec_k;// 5. wk:=wkαkvk w = w - alph * evec_k;
RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop RealD beta = normalise(w);
// 7. vk+1 := wk/βk+1
lmd[k] = alph; lmd[k] = alph;
lme[k] = beta; lme[k] = beta;
if (k>0 && k % orth_period == 0) { if ( (k>0) && ( (k % orth_period) == 0 )) {
std::cout<<GridLogIRL << "Orthogonalising " <<k<<std::endl;
orthogonalize(w,evec,k); // orthonormalise orthogonalize(w,evec,k); // orthonormalise
std::cout<<GridLogIRL << "Orthogonalised " <<std::endl; std::cout<<GridLogIRL << "Orthogonalised " <<k<<std::endl;
} }
if(k < Nm-1) evec[k+1] = w; if(k < Nm-1) evec[k+1] = w;
@ -621,6 +627,8 @@ until convergence
std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl; std::cout<<GridLogIRL << "alpha[" << k << "] = " << zalph << " beta[" << k << "] = "<<beta<<std::endl;
if ( beta < tiny ) if ( beta < tiny )
std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl; std::cout<<GridLogIRL << " beta is tiny "<<beta<<std::endl;
std::cout<<GridLogIRL << "Lanczos step complete " <<k<<std::endl;
} }
void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme, void diagonalize_Eigen(std::vector<RealD>& lmd, std::vector<RealD>& lme,

View File

@ -33,25 +33,29 @@ NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// Take a matrix and form an NE solver calling a Herm solver // Take a matrix and form an NE solver calling a Herm solver
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class NormalEquations : public OperatorFunction<Field>{ template<class Field> class NormalEquations {
private: private:
SparseMatrixBase<Field> & _Matrix; SparseMatrixBase<Field> & _Matrix;
OperatorFunction<Field> & _HermitianSolver; OperatorFunction<Field> & _HermitianSolver;
LinearFunction<Field> & _Guess;
public: public:
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Wrap the usual normal equations trick // Wrap the usual normal equations trick
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver) NormalEquations(SparseMatrixBase<Field> &Matrix, OperatorFunction<Field> &HermitianSolver,
: _Matrix(Matrix), _HermitianSolver(HermitianSolver) {}; LinearFunction<Field> &Guess)
: _Matrix(Matrix), _HermitianSolver(HermitianSolver), _Guess(Guess) {};
void operator() (const Field &in, Field &out){ void operator() (const Field &in, Field &out){
Field src(in.Grid()); Field src(in.Grid());
Field tmp(in.Grid());
MdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(_Matrix);
_Matrix.Mdag(in,src); _Matrix.Mdag(in,src);
_HermitianSolver(src,out); // Mdag M out = Mdag in _Guess(src,out);
_HermitianSolver(MdagMOp,src,out); // Mdag M out = Mdag in
} }
}; };

View File

@ -30,12 +30,12 @@ template<class Field> class PowerMethod
RealD vden = norm2(src_n); RealD vden = norm2(src_n);
RealD na = vnum/vden; RealD na = vnum/vden;
if ( (fabs(evalMaxApprox/na - 1.0) < 0.01) || (i==_MAX_ITER_EST_-1) ) { if ( (fabs(evalMaxApprox/na - 1.0) < 0.001) || (i==_MAX_ITER_EST_-1) ) {
evalMaxApprox = na; evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
return evalMaxApprox; return evalMaxApprox;
} }
evalMaxApprox = na; evalMaxApprox = na;
std::cout << GridLogMessage << " Approximation of largest eigenvalue: " << evalMaxApprox << std::endl;
src_n = tmp; src_n = tmp;
} }
assert(0); assert(0);

View File

@ -38,10 +38,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#define GCRLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level<<" "
template<class Field> template<class Field>
class PrecGeneralisedConjugateResidual : public OperatorFunction<Field> { class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
public: public:
using OperatorFunction<Field>::operator();
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
@ -49,23 +50,29 @@ public:
int mmax; int mmax;
int nstep; int nstep;
int steps; int steps;
int level;
GridStopWatch PrecTimer; GridStopWatch PrecTimer;
GridStopWatch MatTimer; GridStopWatch MatTimer;
GridStopWatch LinalgTimer; GridStopWatch LinalgTimer;
LinearFunction<Field> &Preconditioner; LinearFunction<Field> &Preconditioner;
LinearOperatorBase<Field> &Linop;
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction<Field> &Prec,int _mmax,int _nstep) : void Level(int lv) { level=lv; };
PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearOperatorBase<Field> &_Linop,LinearFunction<Field> &Prec,int _mmax,int _nstep) :
Tolerance(tol), Tolerance(tol),
MaxIterations(maxit), MaxIterations(maxit),
Linop(_Linop),
Preconditioner(Prec), Preconditioner(Prec),
mmax(_mmax), mmax(_mmax),
nstep(_nstep) nstep(_nstep)
{ {
level=1;
verbose=1; verbose=1;
}; };
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){ void operator() (const Field &src, Field &psi){
psi=Zero(); psi=Zero();
RealD cp, ssq,rsq; RealD cp, ssq,rsq;
@ -84,9 +91,9 @@ public:
steps=0; steps=0;
for(int k=0;k<MaxIterations;k++){ for(int k=0;k<MaxIterations;k++){
cp=GCRnStep(Linop,src,psi,rsq); cp=GCRnStep(src,psi,rsq);
std::cout<<GridLogMessage<<"VPGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl; GCRLogLevel <<"PGCR("<<mmax<<","<<nstep<<") "<< steps <<" steps cp = "<<cp<<" target "<<rsq <<std::endl;
if(cp<rsq) { if(cp<rsq) {
@ -95,24 +102,26 @@ public:
Linop.HermOp(psi,r); Linop.HermOp(psi,r);
axpy(r,-1.0,src,r); axpy(r,-1.0,src,r);
RealD tr = norm2(r); RealD tr = norm2(r);
std::cout<<GridLogMessage<<"PrecGeneralisedConjugateResidual: Converged on iteration " <<steps GCRLogLevel<<"PGCR: Converged on iteration " <<steps
<< " computed residual "<<sqrt(cp/ssq) << " computed residual "<<sqrt(cp/ssq)
<< " true residual " <<sqrt(tr/ssq) << " true residual " <<sqrt(tr/ssq)
<< " target " <<Tolerance <<std::endl; << " target " <<Tolerance <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl; GCRLogLevel<<"PGCR Time elapsed: Total "<< SolverTimer.Elapsed() <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl; /*
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl; GCRLogLevel<<"PGCR Time elapsed: Precon "<< PrecTimer.Elapsed() <<std::endl;
std::cout<<GridLogMessage<<"VPGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl; GCRLogLevel<<"PGCR Time elapsed: Matrix "<< MatTimer.Elapsed() <<std::endl;
GCRLogLevel<<"PGCR Time elapsed: Linalg "<< LinalgTimer.Elapsed() <<std::endl;
*/
return; return;
} }
} }
std::cout<<GridLogMessage<<"Variable Preconditioned GCR did not converge"<<std::endl; GCRLogLevel<<"Variable Preconditioned GCR did not converge"<<std::endl;
assert(0); assert(0);
} }
RealD GCRnStep(LinearOperatorBase<Field> &Linop,const Field &src, Field &psi,RealD rsq){ RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
RealD cp; RealD cp;
RealD a, b; RealD a, b;
@ -134,9 +143,7 @@ public:
std::vector<Field> p(mmax,grid); std::vector<Field> p(mmax,grid);
std::vector<RealD> qq(mmax); std::vector<RealD> qq(mmax);
std::cout<<GridLogIterative<< " ************** "<< std::endl; GCRLogLevel<< "PGCR nStep("<<nstep<<")"<<std::endl;
std::cout<<GridLogIterative<< " GCRnStep("<<nstep<<")"<<std::endl;
std::cout<<GridLogIterative<< " ************** "<< std::endl;
////////////////////////////////// //////////////////////////////////
// initial guess x0 is taken as nonzero. // initial guess x0 is taken as nonzero.
@ -150,35 +157,15 @@ public:
LinalgTimer.Start(); LinalgTimer.Start();
r=src-Az; r=src-Az;
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout<<GridLogIterative<< " GCRnStep true residual r = src - A psi "<<norm2(r) <<std::endl; GCRLogLevel<< "PGCR true residual r = src - A psi "<<norm2(r) <<std::endl;
///////////////////// /////////////////////
// p = Prec(r) // p = Prec(r)
///////////////////// /////////////////////
std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
PrecTimer.Start(); PrecTimer.Start();
Preconditioner(r,z); Preconditioner(r,z);
PrecTimer.Stop(); PrecTimer.Stop();
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
// MatTimer.Start();
// Linop.HermOp(z,tmp);
// MatTimer.Stop();
// LinalgTimer.Start();
// ttmp=tmp;
// tmp=tmp-r;
// LinalgTimer.Stop();
/*
std::cout<<GridLogMessage<<r<<std::endl;
std::cout<<GridLogMessage<<z<<std::endl;
std::cout<<GridLogMessage<<ttmp<<std::endl;
std::cout<<GridLogMessage<<tmp<<std::endl;
*/
MatTimer.Start(); MatTimer.Start();
Linop.HermOpAndNorm(z,Az,zAz,zAAz); Linop.HermOpAndNorm(z,Az,zAz,zAAz);
@ -190,7 +177,6 @@ public:
p[0]= z; p[0]= z;
q[0]= Az; q[0]= Az;
qq[0]= zAAz; qq[0]= zAAz;
std::cout<<GridLogIterative<< " GCRnStep p0=z, q0 = A p0 " <<std::endl;
cp =norm2(r); cp =norm2(r);
LinalgTimer.Stop(); LinalgTimer.Stop();
@ -212,20 +198,16 @@ public:
cp = axpy_norm(r,-a,q[peri_k],r); cp = axpy_norm(r,-a,q[peri_k],r);
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout<<GridLogMessage<< " VPGCR_step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl; GCRLogLevel<< "PGCR step["<<steps<<"] resid " << cp << " target " <<rsq<<std::endl;
if((k==nstep-1)||(cp<rsq)){ if((k==nstep-1)||(cp<rsq)){
return cp; return cp;
} }
std::cout<<GridLogIterative<< " GCRnStep apply preconditioner z= M^-1 r "<< std::endl;
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
PrecTimer.Start(); PrecTimer.Start();
Preconditioner(r,z);// solve Az = r Preconditioner(r,z);// solve Az = r
PrecTimer.Stop(); PrecTimer.Stop();
std::cout<<GridLogIterative<< " --------------------------------------- "<< std::endl;
std::cout<<GridLogIterative<< " GCRnStep called Preconditioner z "<< norm2(z) <<std::endl;
MatTimer.Start(); MatTimer.Start();
Linop.HermOpAndNorm(z,Az,zAz,zAAz); Linop.HermOpAndNorm(z,Az,zAz,zAAz);

View File

@ -6,6 +6,12 @@ NAMESPACE_BEGIN(Grid);
MemoryStats *MemoryProfiler::stats = nullptr; MemoryStats *MemoryProfiler::stats = nullptr;
bool MemoryProfiler::debug = false; bool MemoryProfiler::debug = false;
#ifdef GRID_NVCC
#define SMALL_LIMIT (0)
#else
#define SMALL_LIMIT (4096)
#endif
#ifdef POINTER_CACHE #ifdef POINTER_CACHE
int PointerCache::victim; int PointerCache::victim;
@ -13,7 +19,7 @@ PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
void *PointerCache::Insert(void *ptr,size_t bytes) { void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return ptr; if (bytes < SMALL_LIMIT ) return ptr;
#ifdef GRID_OMP #ifdef GRID_OMP
assert(omp_in_parallel()==0); assert(omp_in_parallel()==0);
@ -50,7 +56,7 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
void *PointerCache::Lookup(size_t bytes) { void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL; if (bytes < SMALL_LIMIT ) return NULL;
#ifdef GRID_OMP #ifdef GRID_OMP
assert(omp_in_parallel()==0); assert(omp_in_parallel()==0);

View File

@ -49,8 +49,13 @@ NAMESPACE_BEGIN(Grid);
#ifdef POINTER_CACHE #ifdef POINTER_CACHE
class PointerCache { class PointerCache {
private: private:
/*Pinning pages is costly*/
/*Could maintain separate large and small allocation caches*/
#ifdef GRID_NVCC
static const int Ncache=128;
#else
static const int Ncache=8; static const int Ncache=8;
#endif
static int victim; static int victim;
typedef struct { typedef struct {
@ -63,7 +68,6 @@ private:
public: public:
static void *Insert(void *ptr,size_t bytes) ; static void *Insert(void *ptr,size_t bytes) ;
static void *Lookup(size_t bytes) ; static void *Lookup(size_t bytes) ;
@ -170,6 +174,7 @@ public:
// Unified (managed) memory // Unified (managed) memory
//////////////////////////////////// ////////////////////////////////////
if ( ptr == (_Tp *) NULL ) { if ( ptr == (_Tp *) NULL ) {
// printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout);
auto err = cudaMallocManaged((void **)&ptr,bytes); auto err = cudaMallocManaged((void **)&ptr,bytes);
if( err != cudaSuccess ) { if( err != cudaSuccess ) {
ptr = (_Tp *) NULL; ptr = (_Tp *) NULL;

View File

@ -37,19 +37,18 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
GridBase *grid = l.Grid(); GridBase *grid = l.Grid();
int Nsimd = grid->iSites(); int Nsimd = grid->iSites();
Coordinate gcoor;
ExtractBuffer<scalar_type> mergebuf(Nsimd);
vector_type vI;
auto l_v = l.View(); auto l_v = l.View();
for(int o=0;o<grid->oSites();o++){ thread_for( o, grid->oSites(), {
vector_type vI;
Coordinate gcoor;
ExtractBuffer<scalar_type> mergebuf(Nsimd);
for(int i=0;i<grid->iSites();i++){ for(int i=0;i<grid->iSites();i++){
grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
mergebuf[i]=(Integer)gcoor[mu]; mergebuf[i]=(Integer)gcoor[mu];
} }
merge<vector_type,scalar_type>(vI,mergebuf); merge<vector_type,scalar_type>(vI,mergebuf);
l_v[o]=vI; l_v[o]=vI;
} });
}; };
// LatticeCoordinate(); // LatticeCoordinate();

View File

@ -1,5 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_transfer.h Source file: ./lib/lattice/Lattice_transfer.h
@ -83,12 +82,35 @@ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Latti
}); });
} }
template<class vobj,class CComplex,int nbasis> template<class vobj,class CComplex,int nbasis>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData, inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
Lattice<CComplex> ip(coarse);
// auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View();
auto ip_ = ip.View();
for(int v=0;v<nbasis;v++) {
blockInnerProduct(ip,Basis[v],fineData);
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
coalescedWrite(coarseData_[sc](v),ip_(sc));
});
}
}
template<class vobj,class CComplex,int nbasis>
inline void blockProject1(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData, const Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis) const std::vector<Lattice<vobj> > &Basis)
{ {
typedef iVector<CComplex,nbasis > coarseSiteData;
coarseSiteData elide;
typedef decltype(coalescedRead(elide)) ScalarComplex;
GridBase * fine = fineData.Grid(); GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid(); GridBase * coarse= coarseData.Grid();
int _ndimension = coarse->_ndimension; int _ndimension = coarse->_ndimension;
@ -106,26 +128,40 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
} }
int blockVol = fine->oSites()/coarse->oSites();
coarseData=Zero(); coarseData=Zero();
auto fineData_ = fineData.View(); auto fineData_ = fineData.View();
auto coarseData_ = coarseData.View(); auto coarseData_ = coarseData.View();
// Loop over coars parallel, and then loop over fine associated with coarse. ////////////////////////////////////////////////////////////////////////////////////////////////////////
thread_for( sf, fine->oSites(), { // To make this lock free, loop over coars parallel, and then loop over fine associated with coarse.
int sc; // Otherwise do fine inner product per site, and make the update atomic
Coordinate coor_c(_ndimension); ////////////////////////////////////////////////////////////////////////////////////////////////////////
Coordinate coor_f(_ndimension); accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), {
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
thread_critical { auto sc=sci/nbasis;
for(int i=0;i<nbasis;i++) { auto i=sci%nbasis;
auto Basis_ = Basis[i].View(); auto Basis_ = Basis[i].View();
coarseData_[sc](i)=coarseData_[sc](i) + innerProduct(Basis_[sf],fineData_[sf]);
} Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
int sf;
decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero();
for(int sb=0;sb<blockVol;sb++){
Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r);
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
reduce=reduce+innerProduct(Basis_(sf),fineData_(sf));
} }
coalescedWrite(coarseData_[sc](i),reduce);
}); });
return; return;
} }
@ -160,7 +196,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
auto fineY_ = fineY.View(); auto fineY_ = fineY.View();
auto coarseA_= coarseA.View(); auto coarseA_= coarseA.View();
thread_for(sf, fine->oSites(), { accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
int sc; int sc;
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
@ -171,7 +207,7 @@ inline void blockZAXPY(Lattice<vobj> &fineZ,
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
// z = A x + y // z = A x + y
fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf]; coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf));
}); });
@ -196,7 +232,7 @@ inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
fine_inner = localInnerProduct(fineX,fineY); fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner); blockSum(coarse_inner,fine_inner);
thread_for(ss, coarse->oSites(),{ accelerator_for(ss, coarse->oSites(), 1, {
CoarseInner_[ss] = coarse_inner_[ss]; CoarseInner_[ss] = coarse_inner_[ss];
}); });
} }
@ -226,23 +262,29 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
for(int d=0 ; d<_ndimension;d++){ for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
} }
int blockVol = fine->oSites()/coarse->oSites();
// Turn this around to loop threaded over sc and interior loop // Turn this around to loop threaded over sc and interior loop
// over sf would thread better // over sf would thread better
coarseData=Zero();
auto coarseData_ = coarseData.View(); auto coarseData_ = coarseData.View();
auto fineData_ = fineData.View(); auto fineData_ = fineData.View();
thread_for(sf,fine->oSites(),{ accelerator_for(sc,coarse->oSites(),1,{
int sc;
// One thread per sub block
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension); Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate
coarseData_[sc]=Zero();
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int sb=0;sb<blockVol;sb++){
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); int sf;
Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions);
thread_critical {
coarseData_[sc]=coarseData_[sc]+fineData_[sf]; coarseData_[sc]=coarseData_[sc]+fineData_[sf];
} }
@ -296,6 +338,7 @@ inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> >
} }
} }
#if 0
template<class vobj,class CComplex,int nbasis> template<class vobj,class CComplex,int nbasis>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData, inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData, Lattice<vobj> &fineData,
@ -321,7 +364,7 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
auto coarseData_ = coarseData.View(); auto coarseData_ = coarseData.View();
// Loop with a cache friendly loop ordering // Loop with a cache friendly loop ordering
thread_for(sf,fine->oSites(),{ accelerator_for(sf,fine->oSites(),1,{
int sc; int sc;
Coordinate coor_c(_ndimension); Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension); Coordinate coor_f(_ndimension);
@ -332,13 +375,35 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
for(int i=0;i<nbasis;i++) { for(int i=0;i<nbasis;i++) {
auto basis_ = Basis[i].View(); auto basis_ = Basis[i].View();
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]; if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]);
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]; else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]);
} }
}); });
return; return;
} }
#else
template<class vobj,class CComplex,int nbasis>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
fineData=Zero();
for(int i=0;i<nbasis;i++) {
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
Lattice<CComplex> cip(coarse);
auto cip_ = cip.View();
auto ip_ = ip.View();
accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
coalescedWrite(cip_[sc], ip_(sc)());
});
blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
}
}
#endif
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars. // Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
// Simd layouts need not match since we use peek/poke Local // Simd layouts need not match since we use peek/poke Local

View File

@ -101,7 +101,8 @@ public:
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
// Efficient support for multigrid coarsening // Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void Meooe5D (const FermionField &in, FermionField &out); void Meooe5D (const FermionField &in, FermionField &out);
void MeooeDag5D (const FermionField &in, FermionField &out); void MeooeDag5D (const FermionField &in, FermionField &out);

View File

@ -62,14 +62,15 @@ public:
// Efficient support for multigrid coarsening // Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Physical surface field utilities // Physical surface field utilities
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case // virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case
// virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case // virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors // Constructors
ContinuedFractionFermion5D(GaugeField &_Umu, ContinuedFractionFermion5D(GaugeField &_Umu,

View File

@ -89,6 +89,7 @@ public:
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};

View File

@ -103,6 +103,7 @@ public:
// Multigrid assistance; force term uses too // Multigrid assistance; force term uses too
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp); void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////

View File

@ -86,7 +86,8 @@ public:
void MooeeDag (const FermionField &in, FermionField &out); void MooeeDag (const FermionField &in, FermionField &out);
void MooeeInvDag (const FermionField &in, FermionField &out); void MooeeInvDag (const FermionField &in, FermionField &out);
void Mdir (const FermionField &in, FermionField &out,int dir,int disp); void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
// These can be overridden by fancy 5d chiral action // These can be overridden by fancy 5d chiral action

View File

@ -67,12 +67,13 @@ public:
// Efficient support for multigrid coarsening // Efficient support for multigrid coarsening
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp);
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Physical surface field utilities // Physical surface field utilities
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d);
virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d);
// Constructors // Constructors
PartialFractionFermion5D(GaugeField &_Umu, PartialFractionFermion5D(GaugeField &_Umu,

View File

@ -115,9 +115,10 @@ public:
// Multigrid assistance; force term uses too // Multigrid assistance; force term uses too
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp); void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void MdirAll(const FermionField &in, std::vector<FermionField> &out);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp, void DhopDirAll(const FermionField &in, std::vector<FermionField> &out);
int gamma, int dag); void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Extra methods added by derived // Extra methods added by derived

View File

@ -111,15 +111,16 @@ public:
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
// These can be overridden by fancy 5d chiral action // These can be overridden by fancy 5d chiral action
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
// Implement hopping term non-hermitian hopping term; half cb or both // Implement hopping term non-hermitian hopping term; half cb or both
// Implement s-diagonal DW // Implement s-diagonal DW
@ -131,6 +132,9 @@ public:
// add a DhopComm // add a DhopComm
// -- suboptimal interface will presently trigger multiple comms. // -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
void DhopDirComms(const FermionField &in);
void DhopDirCalc(const FermionField &in, FermionField &out,int point);
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// New methods added // New methods added

View File

@ -60,6 +60,9 @@ public:
int Ls, int Nsite, const FermionField &in, FermionField &out, int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ; int interior=1,int exterior=1) ;
static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
@ -100,9 +103,18 @@ public:
private: private:
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirTp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
static accelerator_inline void DhopDirTm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
// Specialised variants // Specialised variants
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);

View File

@ -54,6 +54,14 @@ public:
_Mat.Mdir(in,tmp,dir,disp); _Mat.Mdir(in,tmp,dir,disp);
G5R5(out,tmp); G5R5(out,tmp);
} }
void OpDirAll(const Field &in, std::vector<Field> &out) {
Field tmp(in.Grid());
_Mat.MdirAll(in,out);
for(int p=0;p<out.size();p++) {
tmp=out[p];
G5R5(out[p],tmp);
}
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
@ -96,6 +104,12 @@ public:
_Mat.Mdir(in,tmp,dir,disp); _Mat.Mdir(in,tmp,dir,disp);
out=g5*tmp; out=g5*tmp;
} }
void OpDirAll(const Field &in, std::vector<Field> &out) {
_Mat.MdirAll(in,out);
for(int p=0;p<out.size();p++) {
out[p]=g5*out[p];
}
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){

View File

@ -389,6 +389,14 @@ void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,in
Meo5D(psi,tmp); Meo5D(psi,tmp);
this->DhopDir(tmp,chi,dir,disp); this->DhopDir(tmp,chi,dir,disp);
} }
template<class Impl>
void CayleyFermion5D<Impl>::MdirAll(const FermionField &psi, std::vector<FermionField> &out)
{
FermionField tmp(psi.Grid());
Meo5D(psi,tmp);
this->DhopDirAll(tmp,out);
}
// force terms; five routines; default to Dhop on diagonal // force terms; five routines; default to Dhop on diagonal
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)

View File

@ -143,6 +143,25 @@ void ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFi
} }
} }
template<class Impl> template<class Impl>
void ContinuedFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi)
{
int Ls = this->Ls;
this->DhopDirAll(psi,chi); // Dslash on diagonal. g5 Dslash is hermitian
for(int p=0;p<chi.size();p++){
int sign=1;
for(int s=0;s<Ls;s++){
if ( s==(Ls-1) ){
ag5xpby_ssp(chi[p],Beta[s]*ZoloHiInv,chi[p],0.0,chi[p],s,s);
} else {
ag5xpby_ssp(chi[p],cc[s]*Beta[s]*sign*ZoloHiInv,chi[p],0.0,chi[p],s,s);
}
sign=-sign;
}
}
}
template<class Impl>
void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi) void ContinuedFractionFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{ {
int Ls = this->Ls; int Ls = this->Ls;

View File

@ -538,10 +538,16 @@ void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
// Implement the general interface. Here we use SAME mass on all slices // Implement the general interface. Here we use SAME mass on all slices
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
{
DhopDir(in, out, dir, disp); DhopDir(in, out, dir, disp);
} }
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
{
assert(0);
}
template <class Impl>
RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) { RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) {
out.Checkerboard() = in.Checkerboard(); out.Checkerboard() = in.Checkerboard();
Dhop(in, out, DaggerNo); Dhop(in, out, DaggerNo);

View File

@ -362,12 +362,19 @@ void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField
} }
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
{
DhopDir(in, out, dir, disp); DhopDir(in, out, dir, disp);
} }
template <class Impl>
void ImprovedStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
{
assert(0); // Not implemented yet
}
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
{
Compressor compressor; Compressor compressor;
Stencil.HaloExchange(in, compressor); Stencil.HaloExchange(in, compressor);
@ -380,6 +387,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
}); });
}; };
template <class Impl> template <class Impl>
void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &U,
@ -404,7 +412,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
#ifdef GRID_OMP #ifdef GRID_OMP
Compressor compressor; Compressor compressor;
int len = U.Grid()->oSites(); int len = U.Grid()->oSites();
const int LLs = 1;
DhopTotalTime -= usecond(); DhopTotalTime -= usecond();

View File

@ -31,7 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> template<class Impl>
void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
// this does both dag and undag but is trivial; make a common helper routing // this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls; int Ls = this->Ls;
@ -45,8 +45,25 @@ void PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFiel
ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1); ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);
} }
ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1); ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1);
} }
template<class Impl>
void PartialFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi){
// this does both dag and undag but is trivial; make a common helper routing
int Ls = this->Ls;
this->DhopDirAll(psi,chi);
for(int point=0;point<chi.size();point++){
int nblock=(Ls-1)/2;
for(int b=0;b<nblock;b++){
int s = 2*b;
ag5xpby_ssp(chi[point],-scale,chi[point],0.0,chi[point],s,s);
ag5xpby_ssp(chi[point], scale,chi[point],0.0,chi[point],s+1,s+1);
}
ag5xpby_ssp(chi[point],p[nblock]*scale/amax,chi[point],0.0,chi[point],Ls-1,Ls-1);
}
}
template<class Impl> template<class Impl>
void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag) void PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag)
{ {

View File

@ -241,6 +241,15 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma); Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
}; };
template<class Impl>
void WilsonFermion5D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
{
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in,compressor);
uint64_t Nsite = Umu.Grid()->oSites();
Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
};
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,

View File

@ -319,28 +319,51 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d
} }
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)
{
DhopDir(in, out, dir, disp); DhopDir(in, out, dir, disp);
} }
template <class Impl>
void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)
{
DhopDirAll(in, out);
}
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
{ {
Compressor compressor(DaggerNo);
Stencil.HaloExchange(in, compressor);
int skip = (disp == 1) ? 0 : 1; int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4; int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4; int gamma = dir + (1 - skip) * 4;
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo); DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
}; };
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
{ {
Compressor compressor(dag); Compressor compressor(DaggerNo);
Stencil.HaloExchange(in, compressor); Stencil.HaloExchange(in, compressor);
assert((out.size()==8)||(out.size()==9));
for(int dir=0;dir<Nd;dir++){
for(int disp=-1;disp<=1;disp+=2){
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
}
}
}
template <class Impl>
void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)
{
int Ls=1; int Ls=1;
int Nsite=in.oSites(); uint64_t Nsite=in.oSites();
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma); Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
}; };
@ -348,7 +371,8 @@ template <class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeField &U,
const FermionField &in, const FermionField &in,
FermionField &out, int dag) { FermionField &out, int dag)
{
#ifdef GRID_OMP #ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,in,out,dag); DhopInternalOverlappedComms(st,lo,U,in,out,dag);

View File

@ -91,8 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
} \ } \
synchronise(); synchronise();
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \ #define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \
if (gamma == Dir) { \
if (SE->_is_local ) { \ if (SE->_is_local ) { \
int perm= SE->_permute; \ int perm= SE->_permute; \
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
@ -102,10 +101,14 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
} \ } \
synchronise(); \ synchronise(); \
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \ Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
Recon(result, Uchi); \ Recon(result, Uchi);
synchronise(); \
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
if (gamma == Dir) { \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon); \
} }
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute // All legs kernels ; comms then compute
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
@ -284,6 +287,35 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
} }
}; };
#define DhopDirMacro(Dir,spProj,spRecon) \
template <class Impl> \
void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
{ \
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; \
typedef decltype(coalescedRead(in[0])) calcSpinor; \
calcHalfSpinor chi; \
calcSpinor result; \
calcHalfSpinor Uchi; \
StencilEntry *SE; \
int ptype; \
const int Nsimd = SiteHalfSpinor::Nsimd(); \
const int lane=SIMTlane(Nsimd); \
\
SE = st.GetEntry(ptype, dir, sF); \
GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \
coalescedWrite(out[sF], result,lane); \
}
DhopDirMacro(Xp,spProjXp,spReconXp);
DhopDirMacro(Yp,spProjYp,spReconYp);
DhopDirMacro(Zp,spProjZp,spReconZp);
DhopDirMacro(Tp,spProjTp,spReconTp);
DhopDirMacro(Xm,spProjXm,spReconXm);
DhopDirMacro(Ym,spProjYm,spReconYm);
DhopDirMacro(Zm,spProjZm,spReconZm);
DhopDirMacro(Tm,spProjTm,spReconTm);
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
@ -299,18 +331,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
const int lane=SIMTlane(Nsimd); const int lane=SIMTlane(Nsimd);
SE = st.GetEntry(ptype, dir, sF); SE = st.GetEntry(ptype, dir, sF);
if (gamma == Xp) { GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp);
if (SE->_is_local ) {
int perm= SE->_permute;
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);
spProjXp(chi,tmp);
} else {
chi = coalescedRead(buf[SE->_offset],lane);
}
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
spReconXp(result, Uchi);
}
GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp); GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp);
GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp); GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp);
GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp); GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp);
@ -321,6 +342,38 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
coalescedWrite(out[sF], result,lane); coalescedWrite(out[sF], result,lane);
} }
template <class Impl>
void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, std::vector<FermionField> &out)
{
auto U_v = U.View();
auto in_v = in.View();
auto st_v = st.View();
auto out_Xm = out[0].View();
auto out_Ym = out[1].View();
auto out_Zm = out[2].View();
auto out_Tm = out[3].View();
auto out_Xp = out[4].View();
auto out_Yp = out[5].View();
auto out_Zp = out[6].View();
auto out_Tp = out[7].View();
accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{
int sU=sss/Ls;
int sF =sss;
DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0);
DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1);
DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2);
DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3);
DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4);
DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5);
DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6);
DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7);
});
}
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
@ -332,13 +385,32 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
auto in_v = in.View(); auto in_v = in.View();
auto out_v = out.View(); auto out_v = out.View();
auto st_v = st.View(); auto st_v = st.View();
accelerator_for(ss,Nsite,Simd::Nsimd(),{ #define LoopBody(Dir) \
for(int s=0;s<Ls;s++){ case Dir : \
int sU=ss; accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \
int sF = s+Ls*sU; for(int s=0;s<Ls;s++){ \
DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma); int sU=ss; \
} int sF = s+Ls*sU; \
}); DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\
} \
}); \
break;
switch(gamma){
LoopBody(Xp);
LoopBody(Yp);
LoopBody(Zp);
LoopBody(Tp);
LoopBody(Xm);
LoopBody(Ym);
LoopBody(Zm);
LoopBody(Tm);
default:
assert(0);
break;
}
#undef LoopBody
} }
#define KERNEL_CALLNB(A) \ #define KERNEL_CALLNB(A) \

View File

@ -92,6 +92,7 @@ public:
}; };
void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);} void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);}
void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);}
void Mdiag(const GaugeField&, GaugeField&){ assert(0);} void Mdiag(const GaugeField&, GaugeField&){ assert(0);}
void ImportGauge(const GaugeField& _U) { void ImportGauge(const GaugeField& _U) {

View File

@ -403,6 +403,10 @@ namespace Optimization {
accelerator_inline GpuVectorRD operator()(GpuVectorRD a, GpuVectorRD b){ accelerator_inline GpuVectorRD operator()(GpuVectorRD a, GpuVectorRD b){
return a/b; return a/b;
} }
accelerator_inline GpuVectorI operator()(GpuVectorI a, GpuVectorI b){
return a/b;
}
// Danger -- element wise divide fro complex, not complex div. // Danger -- element wise divide fro complex, not complex div.
// See Grid_vector_types.h lines around 735, applied after "toReal" // See Grid_vector_types.h lines around 735, applied after "toReal"
accelerator_inline GpuVectorCF operator()(GpuVectorCF a, GpuVectorCF b){ accelerator_inline GpuVectorCF operator()(GpuVectorCF a, GpuVectorCF b){

View File

@ -628,6 +628,7 @@ void Grid_debug_handler_init(void)
sigaction(SIGSEGV,&sa,NULL); sigaction(SIGSEGV,&sa,NULL);
sigaction(SIGTRAP,&sa,NULL); sigaction(SIGTRAP,&sa,NULL);
sigaction(SIGBUS,&sa,NULL); sigaction(SIGBUS,&sa,NULL);
sigaction(SIGUSR2,&sa,NULL);
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO); feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);

View File

@ -1,4 +1,4 @@
# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid) # Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:GridBasedSoftware_Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=GridBasedSoftware_Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid)
**Data parallel C++ mathematical object library.** **Data parallel C++ mathematical object library.**

View File

@ -280,7 +280,8 @@ case ${CXX} in
# CXXLD="nvcc -v -link" # CXXLD="nvcc -v -link"
CXX="nvcc -x cu " CXX="nvcc -x cu "
CXXLD="nvcc -link" CXXLD="nvcc -link"
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr" # CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr"
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
if test $ac_openmp = yes; then if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
fi fi

View File

@ -73,6 +73,7 @@ public:
} }
// Support for coarsening to a multigrid // Support for coarsening to a multigrid
void OpDirAll (const Field &in, std::vector<Field> &out){};
void OpDiag (const Field &in, Field &out) {}; void OpDiag (const Field &in, Field &out) {};
void OpDir (const Field &in, Field &out,int dir,int disp){}; void OpDir (const Field &in, Field &out,int dir,int disp){};

View File

@ -1,4 +1,6 @@
/*************************************************************************************
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -29,323 +31,174 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h> #include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidual.h>
//#include <algorithms/iterative/PrecConjugateResidual.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
; /* Params
* Grid:
* block1(4)
* block2(4)
*
* Subspace
* * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100
* * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100
class myclass: Serializable { * Smoother:
public: * * Fine: Cheby(hi, lo, order) -- 60,0.5,10
* * Coarse: Cheby(hi, lo, order) -- 12,0.1,4
GRID_SERIALIZABLE_CLASS_MEMBERS(myclass,
int, domaindecompose,
int, domainsize,
int, order,
int, Ls,
double, mq,
double, lo,
double, hi,
int, steps);
myclass(){};
};
myclass params;
* Lanczos:
* CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61
*/
RealD InverseApproximation(RealD x){ RealD InverseApproximation(RealD x){
return 1.0/x; return 1.0/x;
} }
template<class Fobj,class CComplex,int nbasis, class Matrix> template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
Chebyshev<Field> Cheby;
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) :
_SmootherOperator(SmootherOperator),
_SmootherMatrix(SmootherMatrix),
Cheby(_lo,_hi,_ord,InverseApproximation)
{};
void operator() (const Field &in, Field &out)
{
Field tmp(in.Grid());
MdagMLinearOperator<Matrix,Field> MdagMOp(_SmootherMatrix);
_SmootherOperator.AdjOp(in,tmp);
Cheby(MdagMOp,tmp,out);
}
};
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
RealD tol;
RealD shift;
int maxit;
MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) :
shift(_shift),tol(_tol),maxit(_maxit),
SmootherOperator(_SmootherOperator),
SmootherMatrix(_SmootherMatrix)
{};
void operator() (const Field &in, Field &out)
{
ZeroGuesser<Field> Guess;
ConjugateGradient<Field> CG(tol,maxit,false);
Field src(in.Grid());
ShiftedMdagMLinearOperator<SparseMatrixBase<Field>,Field> MdagMOp(SmootherMatrix,shift);
SmootherOperator.AdjOp(in,src);
Guess(src,out);
CG(MdagMOp,src,out);
}
};
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > { class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public: public:
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates; typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
typedef typename Aggregation<Fobj,CComplex,nbasis>::siteVector siteVector;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseScalar CoarseScalar;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix; typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField; typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
typedef LinearOperatorBase<FineField> FineOperator; typedef LinearOperatorBase<FineField> FineOperator;
typedef LinearFunction <FineField> FineSmoother;
Aggregates & _Aggregates; Aggregates & _Aggregates;
CoarseOperator & _CoarseOperator; CoarseOperator & _CoarseOperator;
Matrix & _FineMatrix; Matrix & _FineMatrix;
FineOperator & _FineOperator; FineOperator & _FineOperator;
Matrix & _SmootherMatrix; Guesser & _Guess;
FineOperator & _SmootherOperator; FineSmoother & _Smoother;
CoarseSolver & _CoarseSolve;
int level; void Level(int lv) {level = lv; };
#define GridLogLevel std::cout << GridLogMessage <<std::string(level,'\t')<< " Level "<<level <<" "
// Constructor
MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse,
FineOperator &Fine,Matrix &FineMatrix, FineOperator &Fine,Matrix &FineMatrix,
FineOperator &Smooth,Matrix &SmootherMatrix) FineSmoother &Smoother,
Guesser &Guess_,
CoarseSolver &CoarseSolve_)
: _Aggregates(Agg), : _Aggregates(Agg),
_CoarseOperator(Coarse), _CoarseOperator(Coarse),
_FineOperator(Fine), _FineOperator(Fine),
_FineMatrix(FineMatrix), _FineMatrix(FineMatrix),
_SmootherOperator(Smooth), _Smoother(Smoother),
_SmootherMatrix(SmootherMatrix) _Guess(Guess_),
_CoarseSolve(CoarseSolve_),
level(1) { }
virtual void operator()(const FineField &in, FineField & out)
{ {
}
void PowerMethod(const FineField &in) {
FineField p1(in.Grid());
FineField p2(in.Grid());
MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
p1=in;
for(int i=0;i<20;i++){
RealD absp1=std::sqrt(norm2(p1));
fMdagMOp.HermOp(p1,p2);// this is the G5 herm bit
// _FineOperator.Op(p1,p2);// this is the G5 herm bit
RealD absp2=std::sqrt(norm2(p2));
if(i%10==9)
std::cout<<GridLogMessage << "Power method on mdagm "<<i<<" " << absp2/absp1<<std::endl;
p1=p2*(1.0/std::sqrt(absp2));
}
}
void operator()(const FineField &in, FineField & out) {
operatorCheby(in,out);
}
////////////////////////////////////////////////////////////////////////
// ADEF2: [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
// ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in
////////////////////////////////////////////////////////////////////////
#if 1
void operatorADEF2(const FineField &in, FineField & out) {
CoarseVector Csrc(_CoarseOperator.Grid()); CoarseVector Csrc(_CoarseOperator.Grid());
CoarseVector Ctmp(_CoarseOperator.Grid());
CoarseVector Csol(_CoarseOperator.Grid()); CoarseVector Csol(_CoarseOperator.Grid());
ConjugateGradient<CoarseVector> CG(1.0e-10,100000);
ConjugateGradient<FineField> fCG(3.0e-2,1000);
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
MdagMLinearOperator<CoarseOperator,CoarseVector> MdagMOp(_CoarseOperator);
MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
FineField tmp(in.Grid());
FineField res(in.Grid());
FineField Min(in.Grid());
// Monitor completeness of low mode space
_Aggregates.ProjectToSubspace (Csrc,in);
_Aggregates.PromoteFromSubspace(Csrc,out);
std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
// [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
_FineOperator.Op(in,tmp);// this is the G5 herm bit
fCG(fMdagMOp,tmp,Min); // solves MdagM = g5 M g5M
// Monitor completeness of low mode space
_Aggregates.ProjectToSubspace (Csrc,Min);
_Aggregates.PromoteFromSubspace(Csrc,out);
std::cout<<GridLogMessage<<"Completeness Min: "<<std::sqrt(norm2(out)/norm2(Min))<<std::endl;
_FineOperator.Op(Min,tmp);
tmp = in - tmp; // in - A Min
Csol=Zero();
_Aggregates.ProjectToSubspace (Csrc,tmp);
HermOp.AdjOp(Csrc,Ctmp);// Normal equations
CG(MdagMOp,Ctmp,Csol);
HermOp.Op(Csol,Ctmp);
Ctmp=Ctmp-Csrc;
std::cout<<GridLogMessage<<"coarse space true residual "<<std::sqrt(norm2(Ctmp)/norm2(Csrc))<<std::endl;
_Aggregates.PromoteFromSubspace(Csol,out);
_FineOperator.Op(out,res);
res=res-tmp;
std::cout<<GridLogMessage<<"promoted sol residual "<<std::sqrt(norm2(res)/norm2(tmp))<<std::endl;
_Aggregates.ProjectToSubspace (Csrc,res);
std::cout<<GridLogMessage<<"coarse space proj of residual "<<norm2(Csrc)<<std::endl;
out = out+Min; // additive coarse space correction
// out = Min; // no additive coarse space correction
_FineOperator.Op(out,tmp);
tmp=tmp-in; // tmp is new residual
std::cout<<GridLogMessage<< " Preconditioner in " << norm2(in)<<std::endl;
std::cout<<GridLogMessage<< " Preconditioner out " << norm2(out)<<std::endl;
std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
}
#endif
// ADEF1: [MP+Q ] in =M [1 - A Q] in + Q in
#if 1
void operatorADEF1(const FineField &in, FineField & out) {
CoarseVector Csrc(_CoarseOperator.Grid());
CoarseVector Ctmp(_CoarseOperator.Grid());
CoarseVector Csol(_CoarseOperator.Grid()); Csol=Zero();
ConjugateGradient<CoarseVector> CG(1.0e-10,100000);
ConjugateGradient<FineField> fCG(3.0e-2,1000);
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
MdagMLinearOperator<CoarseOperator,CoarseVector> MdagMOp(_CoarseOperator);
ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix,0.1);
FineField tmp(in.Grid());
FineField res(in.Grid());
FineField Qin(in.Grid());
// Monitor completeness of low mode space
// _Aggregates.ProjectToSubspace (Csrc,in);
// _Aggregates.PromoteFromSubspace(Csrc,out);
// std::cout<<GridLogMessage<<"Coarse Grid Preconditioner\nCompleteness in: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
_Aggregates.ProjectToSubspace (Csrc,in);
HermOp.AdjOp(Csrc,Ctmp);// Normal equations
CG(MdagMOp,Ctmp,Csol);
_Aggregates.PromoteFromSubspace(Csol,Qin);
// Qin=0;
_FineOperator.Op(Qin,tmp);// A Q in
tmp = in - tmp; // in - A Q in
_FineOperator.Op(tmp,res);// this is the G5 herm bit
fCG(fMdagMOp,res,out); // solves MdagM = g5 M g5M
out = out + Qin;
_FineOperator.Op(out,tmp);
tmp=tmp-in; // tmp is new residual
std::cout<<GridLogMessage<<"preconditioner thinks residual is "<<std::sqrt(norm2(tmp)/norm2(in))<<std::endl;
}
#endif
void SmootherTest (const FineField & in){
FineField vec1(in.Grid());
FineField vec2(in.Grid());
RealD lo[3] = { 0.5, 1.0, 2.0};
// MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
RealD Ni,r;
Ni = norm2(in);
for(int ilo=0;ilo<3;ilo++){
for(int ord=5;ord<50;ord*=2){
std::cout << " lo "<<lo[ilo]<<" order "<<ord<<std::endl;
_SmootherOperator.AdjOp(in,vec1);
Chebyshev<FineField> Cheby (lo[ilo],70.0,ord,InverseApproximation);
Cheby(fMdagMOp,vec1,vec2); // solves MdagM = g5 M g5M
_FineOperator.Op(vec2,vec1);// this is the G5 herm bit
vec1 = in - vec1; // tmp = in - A Min
r=norm2(vec1);
std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
}
}
}
void operatorCheby(const FineField &in, FineField & out) {
CoarseVector Csrc(_CoarseOperator.Grid());
CoarseVector Ctmp(_CoarseOperator.Grid());
CoarseVector Csol(_CoarseOperator.Grid()); Csol=Zero();
ConjugateGradient<CoarseVector> CG(3.0e-3,100000);
HermitianLinearOperator<CoarseOperator,CoarseVector> HermOp(_CoarseOperator);
MdagMLinearOperator<CoarseOperator,CoarseVector> MdagMOp(_CoarseOperator);
// MdagMLinearOperator<Matrix,FineField> fMdagMOp(_FineMatrix);
ShiftedMdagMLinearOperator<Matrix,FineField> fMdagMOp(_SmootherMatrix,0.0);
FineField vec1(in.Grid()); FineField vec1(in.Grid());
FineField vec2(in.Grid()); FineField vec2(in.Grid());
Chebyshev<FineField> Cheby (params.lo,params.hi,params.order,InverseApproximation); double t;
Chebyshev<FineField> ChebyAccu(params.lo,params.hi,params.order,InverseApproximation); // Fine Smoother
t=-usecond();
_Smoother(in,out);
t+=usecond();
GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
_Aggregates.ProjectToSubspace (Csrc,in); // Update the residual
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
// _Aggregates.PromoteFromSubspace(Csrc,out);
// std::cout<<GridLogMessage<<"Completeness: "<<std::sqrt(norm2(out)/norm2(in))<<std::endl;
// ofstream fout("smoother");
// Cheby.csv(fout);
// V11 multigrid.
// Use a fixed chebyshev and hope hermiticity helps.
// To make a working smoother for indefinite operator
// must multiply by "Mdag" (ouch loses all low mode content)
// and apply to poly approx of (mdagm)^-1.
// so that we end up with an odd polynomial.
RealD Ni = norm2(in);
_SmootherOperator.AdjOp(in,vec1);// this is the G5 herm bit
ChebyAccu(fMdagMOp,vec1,out); // solves MdagM = g5 M g5M
// Update with residual for out
_FineOperator.Op(out,vec1);// this is the G5 herm bit
vec1 = in - vec1; // tmp = in - A Min
RealD r = norm2(vec1);
std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<< " " << r << " " << Ni <<std::endl;
// Fine to Coarse
t=-usecond();
_Aggregates.ProjectToSubspace (Csrc,vec1); _Aggregates.ProjectToSubspace (Csrc,vec1);
t+=usecond();
GridLogLevel << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
HermOp.AdjOp(Csrc,Ctmp);// Normal equations // This appears to be zero. // Coarse correction
CG(MdagMOp,Ctmp,Csol); t=-usecond();
_Aggregates.PromoteFromSubspace(Csol,vec1); // Ass^{-1} [in - A Min]_s _CoarseSolve(Csrc,Csol);
// Q = Q[in - A Min] t+=usecond();
out = out+vec1; GridLogLevel << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
// Three preconditioner smoothing -- hermitian if C3 = C1 // Coarse to Fine
// Recompute error t=-usecond();
_FineOperator.Op(out,vec1);// this is the G5 herm bit _Aggregates.PromoteFromSubspace(Csol,vec1);
vec1 = in - vec1; // tmp = in - A Min add(out,out,vec1);
r=norm2(vec1); t+=usecond();
GridLogLevel << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
std::cout<<GridLogMessage << "Coarse resid "<<std::sqrt(r/Ni)<<std::endl; // Residual
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
// Reapply smoother // Fine Smoother
_SmootherOperator.Op(vec1,vec2); // this is the G5 herm bit t=-usecond();
ChebyAccu(fMdagMOp,vec2,vec1); // solves MdagM = g5 M g5M _Smoother(vec1,vec2);
t+=usecond();
out =out+vec1; GridLogLevel << "Smoother took "<< t/1000.0<< "ms" <<std::endl;
vec1 = in - vec1; // tmp = in - A Min
r=norm2(vec1);
std::cout<<GridLogMessage << "Smoother resid "<<std::sqrt(r/Ni)<<std::endl;
add( out,out,vec2);
} }
}; };
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
XmlReader RD("params.xml"); const int Ls=16;
read(RD,"params",params);
std::cout<<"Params: Order "<<params.order<<"["<<params.lo<<","<<params.hi<<"]"<< " steps "<<params.steps<<std::endl;
const int Ls=params.Ls;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
@ -357,14 +210,22 @@ int main (int argc, char ** argv)
// Construct a coarsened grid; utility for this? // Construct a coarsened grid; utility for this?
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
std::vector<int> block ({2,2,2,2}); std::vector<int> block ({2,2,2,2});
std::vector<int> blockc ({2,2,2,2});
const int nbasis= 32; const int nbasis= 32;
const int nbasisc= 32;
auto clatt = GridDefaultLatt(); auto clatt = GridDefaultLatt();
for(int d=0;d<clatt.size();d++){ for(int d=0;d<clatt.size();d++){
clatt[d] = clatt[d]/block[d]; clatt[d] = clatt[d]/block[d];
} }
auto cclatt = clatt;
for(int d=0;d<clatt.size();d++){
cclatt[d] = clatt[d]/blockc[d];
}
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());; GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d); GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
GridCartesian *CoarseCoarse4d = SpaceTimeGrid::makeFourDimGrid(cclatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
GridCartesian *CoarseCoarse5d = SpaceTimeGrid::makeFiveDimGrid(1,CoarseCoarse4d);
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8}); std::vector<int> seeds5({5,6,7,8});
@ -372,186 +233,167 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
Gamma g5(Gamma::Algebra::Gamma5);
LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src; LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src;
LatticeFermion result(FGrid); result=Zero(); LatticeFermion result(FGrid);
LatticeFermion ref(FGrid); ref=Zero();
LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
LatticeGaugeField UmuDD(UGrid);
LatticeColourMatrix U(UGrid);
LatticeColourMatrix zz(UGrid);
FieldMetaData header; FieldMetaData header;
std::string file("./ckpoint_lat.4000"); std::string file("./ckpoint_lat.4000");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);
if ( params.domaindecompose ) {
Lattice<iScalar<vInteger> > coor(UGrid);
zz=Zero();
for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
U = where(mod(coor,params.domainsize)==(Integer)0,zz,U);
PokeIndex<LorentzIndex>(UmuDD,U,mu);
}
} else {
UmuDD = Umu;
}
// SU3::ColdConfiguration(RNG4,Umu);
// SU3::TepidConfiguration(RNG4,Umu);
// SU3::HotConfiguration(RNG4,Umu);
// Umu=Zero();
RealD mass=params.mq;
RealD M5=1.8;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl; std::cout<<GridLogMessage << "Building g5R5 hermitian DWF operator" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
RealD mass=0.001;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
DomainWallFermionR DdwfDD(UmuDD,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace; typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator; typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> CoarseOperator;
typedef CoarseOperator::CoarseVector CoarseVector; typedef CoarseOperator::CoarseVector CoarseVector;
typedef CoarseOperator::siteVector siteVector;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl; std::cout<<GridLogMessage << "Calling Aggregation class to build subspace" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,LatticeFermion> HermDefOp(Ddwf);
Subspace Aggregates(Coarse5d,FGrid,0);
// Aggregates.CreateSubspace(RNG5,HermDefOp,nbasis);
assert ( (nbasis & 0x1)==0);
int nb=nbasis/2;
std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
// Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
// Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb);
for(int n=0;n<nb;n++){
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;
}
for(int n=0;n<nbasis;n++){
std::cout<<GridLogMessage << "vec["<<n<<"] = "<<norm2(Aggregates.subspace[n]) <<std::endl;
}
// for(int i=0;i<nbasis;i++){ Subspace Aggregates(Coarse5d,FGrid,0);
// result = Aggregates.subspace[i];
// Aggregates.subspace[i]=result+g5*result; assert ( (nbasis & 0x1)==0);
// } {
result=Zero(); int nb=nbasis/2;
Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0);
for(int n=0;n<nb;n++){
G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
}
LatticeFermion A(FGrid);
LatticeFermion B(FGrid);
for(int n=0;n<nb;n++){
A = Aggregates.subspace[n];
B = Aggregates.subspace[n+nb];
Aggregates.subspace[n] = A+B; // 1+G5 // eigen value of G5R5 is +1
Aggregates.subspace[n+nb]= A-B; // 1-G5 // eigen value of G5R5 is -1
}
}
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl; std::cout<<GridLogMessage << "Building coarse representation of Indef operator" <<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
typedef CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> Level1Op;
typedef CoarsenedMatrix<siteVector,iScalar<vTComplex>,nbasisc> Level2Op;
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf); Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOp(Ddwf);
Gamma5R5HermitianLinearOperator<DomainWallFermionR,LatticeFermion> HermIndefOpDD(DdwfDD);
CoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LDOp(*Coarse5d,1); // Hermitian matrix Level1Op LDOp(*Coarse5d,1); LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
LDOp.CoarsenOperator(FGrid,HermIndefOp,Aggregates);
//////////////////////////////////////////////////
// Deflate the course space. Recursive multigrid?
//////////////////////////////////////////////////
typedef Aggregation<siteVector,iScalar<vTComplex>,nbasisc> CoarseSubspace;
CoarseSubspace CoarseAggregates(CoarseCoarse5d,Coarse5d,0);
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Testing some coarse space solvers " <<std::endl; std::cout<<GridLogMessage << "Build deflation space in coarse operator "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
CoarseVector c_src (Coarse5d);
CoarseVector c_res (Coarse5d);
gaussian(CRNG,c_src);
c_res=Zero();
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Solving posdef-CG on coarse space "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp); MdagMLinearOperator<CoarseOperator,CoarseVector> PosdefLdop(LDOp);
ConjugateGradient<CoarseVector> CG(1.0e-6,100000); {
CG(PosdefLdop,c_src,c_res); int nb=nbasisc/2;
CoarseAggregates.CreateSubspaceChebyshev(CRNG,PosdefLdop,nb,12.0,0.02,500,100,100,0.0);
for(int n=0;n<nb;n++){
auto subspace = CoarseAggregates.subspace[n].View();
auto subspace_g5 = CoarseAggregates.subspace[n+nb].View();
for(int nn=0;nn<nb;nn++){
for(int site=0;site<Coarse5d->oSites();site++){
subspace_g5[site](nn) = subspace[site](nn);
subspace_g5[site](nn+nb)=-subspace[site](nn+nb);
}
}
}
}
Level2Op L2Op(*CoarseCoarse5d,1); // Hermitian matrix
typedef Level2Op::CoarseVector CoarseCoarseVector;
HermitianLinearOperator<Level1Op,CoarseVector> L1LinOp(LDOp);
L2Op.CoarsenOperator(Coarse5d,L1LinOp,CoarseAggregates);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Solving indef-MCR on coarse space "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
// HermitianLinearOperator<CoarseOperator,CoarseVector> HermIndefLdop(LDOp);
// ConjugateResidual<CoarseVector> MCR(1.0e-6,100000);
// MCR(HermIndefLdop,c_src,c_res);
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building deflation preconditioner "<< std::endl; std::cout<<GridLogMessage << " Running CoarseCoarse grid Lanczos "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> Precon (Aggregates, LDOp, MdagMLinearOperator<Level2Op,CoarseCoarseVector> IRLHermOpL2(L2Op);
HermIndefOp,Ddwf, Chebyshev<CoarseCoarseVector> IRLChebyL2(0.001,4.2,71);
HermIndefOp,Ddwf); FunctionHermOp<CoarseCoarseVector> IRLOpChebyL2(IRLChebyL2,IRLHermOpL2);
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
int cNk=24;
int cNm=36;
int cNstop=24;
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
// MultiGridPreconditioner <vSpinColourVector,vTComplex,nbasis,DomainWallFermionR> PreconDD(Aggregates, LDOp, int cNconv;
// HermIndefOp,Ddwf, std::vector<RealD> eval2(cNm);
// HermIndefOpDD,DdwfDD); std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
// TrivialPrecon<LatticeFermion> simple; CoarseCoarseVector cc_src(CoarseCoarse5d); cc_src=1.0;
IRLL2.calc(eval2,evec2,cc_src,cNconv);
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.1,1000);
DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(L2Op,CoarseCoarseCG,DeflCoarseCoarseGuesser);
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Testing smoother efficacy"<< std::endl; std::cout<<GridLogMessage << "Building 3 level Multigrid "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
Precon.SmootherTest(src); typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,DeflatedGuesser<CoarseVector> , NormalEquations<CoarseVector> > TwoLevelMG;
typedef MultiGridPreconditioner<siteVector,iScalar<vTComplex>,nbasisc,Level1Op, DeflatedGuesser<CoarseCoarseVector>, NormalEquations<CoarseCoarseVector> > CoarseMG;
typedef MultiGridPreconditioner<vSpinColourVector, vTComplex,nbasis, DomainWallFermionR,ZeroGuesser<CoarseVector>, LinearFunction<CoarseVector> > ThreeLevelMG;
// std::cout<<GridLogMessage << "**************************************************"<< std::endl; // MultiGrid preconditioner acting on the coarse space <-> coarsecoarse space
// std::cout<<GridLogMessage << "Testing DD smoother efficacy"<< std::endl; ChebyshevSmoother<CoarseVector, Level1Op > CoarseSmoother(0.1,12.0,3,L1LinOp,LDOp);
// std::cout<<GridLogMessage << "**************************************************"<< std::endl; ChebyshevSmoother<LatticeFermion,DomainWallFermionR> FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf);
// PreconDD.SmootherTest(src);
// std::cout<<GridLogMessage << "**************************************************"<< std::endl; // MirsSmoother<CoarseVector, Level1Op > CoarseCGSmoother(0.1,0.1,4,L1LinOp,LDOp);
// std::cout<<GridLogMessage << "Testing SAP smoother efficacy"<< std::endl; // MirsSmoother<LatticeFermion,DomainWallFermionR> FineCGSmoother(0.0,0.01,8,HermIndefOp,Ddwf);
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
// PreconDD.SAP(src,result);
// std::cout<<GridLogMessage << "**************************************************"<< std::endl; CoarseMG Level2Precon (CoarseAggregates, L2Op,
// std::cout<<GridLogMessage << "Unprec CG "<< std::endl; L1LinOp,LDOp,
// std::cout<<GridLogMessage << "**************************************************"<< std::endl; CoarseSmoother,
DeflCoarseCoarseGuesser,
DeflCoarseCoarseCGNE);
Level2Precon.Level(2);
// TrivialPrecon<LatticeFermion> simple; // PGCR Applying this solver to solve the coarse space problem
// ConjugateGradient<LatticeFermion> fCG(1.0e-8,100000); PrecGeneralisedConjugateResidual<CoarseVector> l2PGCR(0.1, 100, L1LinOp,Level2Precon,16,16);
// fCG(HermDefOp,src,result); l2PGCR.Level(2);
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
ThreeLevelMG ThreeLevelPrecon(Aggregates, LDOp,
HermIndefOp,Ddwf,
FineSmoother,
CoarseZeroGuesser,
l2PGCR);
ThreeLevelPrecon.Level(1);
// Apply the fine-coarse-coarsecoarse 2 deep MG preconditioner in an outer PGCR on the fine fgrid
PrecGeneralisedConjugateResidual<LatticeFermion> l1PGCR(1.0e-8,1000,HermIndefOp,ThreeLevelPrecon,16,16);
l1PGCR.Level(1);
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Red Black Prec CG "<< std::endl; std::cout<<GridLogMessage << "Calling 3 level Multigrid "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
LatticeFermion src_o(FrbGrid);
LatticeFermion result_o(FrbGrid);
pickCheckerboard(Odd,src_o,src);
result_o=Zero();
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
ConjugateGradient<LatticeFermion> pCG(1.0e-8,10000);
// pCG(HermOpEO,src_o,result_o);
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
// std::cout<<GridLogMessage << "Testing GCR on indef matrix "<< std::endl;
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
// PrecGeneralisedConjugateResidual<LatticeFermion> UPGCR(1.0e-8,100000,simple,8,128);
// UPGCR(HermIndefOp,src,result);
/// Get themax eval
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage <<" Applying power method to find spectral range "<<std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
Precon.PowerMethod(src);
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
// std::cout<<GridLogMessage << "Building a two level DDPGCR "<< std::endl;
// std::cout<<GridLogMessage << "**************************************************"<< std::endl;
// PrecGeneralisedConjugateResidual<LatticeFermion> PGCRDD(1.0e-8,100000,PreconDD,8,128);
// result=Zero();
// std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
// PGCRDD(HermIndefOp,src,result);
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Building a two level PGCR "<< std::endl;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
PrecGeneralisedConjugateResidual<LatticeFermion> PGCR(1.0e-8,100000,Precon,8,8);
std::cout<<GridLogMessage<<"checking norm src "<<norm2(src)<<std::endl;
result=Zero(); result=Zero();
PGCR(HermIndefOp,src,result); l1PGCR(src,result);
CoarseVector c_src(Coarse5d); c_src=1.0;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << " Fine PowerMethod "<< std::endl;
PowerMethod<LatticeFermion> PM; PM(HermDefOp,src);
std::cout<<GridLogMessage << " Coarse PowerMethod "<< std::endl;
PowerMethod<CoarseVector> cPM; cPM(PosdefLdop,c_src);
std::cout<<GridLogMessage << " CoarseCoarse PowerMethod "<< std::endl;
PowerMethod<CoarseCoarseVector> ccPM; ccPM(IRLHermOpL2,cc_src);
std::cout<<GridLogMessage << "**************************************************"<< std::endl; std::cout<<GridLogMessage << "**************************************************"<< std::endl;
std::cout<<GridLogMessage << "Done "<< std::endl; std::cout<<GridLogMessage << "Done "<< std::endl;