diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index 450b76df..ba71faf8 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -35,14 +35,13 @@ Author: paboyle NAMESPACE_BEGIN(Grid); class Geometry { - // int dimension; public: int npoint; std::vector directions ; std::vector displacements; Geometry(int _d) { - + int base = (_d==5) ? 1:0; // make coarse grid stencil for 4d , not 5d @@ -187,9 +186,10 @@ public: } } - // - // World of possibilities here. - // + //////////////////////////////////////////////////////////////////////////////////////////////// + // World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit) + // and this is the best I found + //////////////////////////////////////////////////////////////////////////////////////////////// virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase &hermop, int nn, double hi, @@ -249,11 +249,18 @@ public: hermop.HermOp(*Tn,y); - y=xscale*y+mscale*(*Tn); - - *Tnp=2.0*y-(*Tnm); + auto y_v = y.View(); + auto Tn_v = Tn->View(); + auto Tnp_v = Tnp->View(); + auto Tnm_v = Tnm->View(); + accelerator_forNB(ss, FineGrid->oSites(), Nsimd, { + coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); + coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); + }); - if ( (n%orderstep)==0 ) { + // Possible more fine grained control is needed than a linear sweep, + // but huge productivity gain if this is simple algorithm and not a tunable + if ( (n%orderstep)==0 ) { Mn=*Tnp; scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; subspace[b] = Mn; @@ -270,6 +277,7 @@ public: } } + assert(b==nn); } }; @@ -393,42 +401,15 @@ public: return norm2(out); } }; - - void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ - - conformable(_grid,in.Grid()); - conformable(in.Grid(),out.Grid()); - + void MdirComms(const CoarseVector &in) + { SimpleCompressor compressor; - Stencil.HaloExchange(in,compressor); - - int ndim = in.Grid()->Nd(); - - ////////////// - // 4D action like wilson - // 0+ => 0 - // 0- => 1 - // 1+ => 2 - // 1- => 3 - // etc.. - ////////////// - // 5D action like DWF - // 1+ => 0 - // 1- => 1 - // 2+ => 2 - // 2- => 3 - // etc.. - - auto point = [dir, disp, ndim](){ - if(dir == 0 and disp == 0) - return 8; - else if ( ndim==4 ) { - return (4 * dir + 1 - disp) / 2; - } else { - return (4 * (dir-1) + 1 - disp) / 2; - } - }(); + } + void MdirCalc(const CoarseVector &in, CoarseVector &out, int point) + { + conformable(_grid,in.Grid()); + conformable(_grid,out.Grid()); typedef LatticeView Aview; Vector AcceleratorViewContainer; @@ -458,10 +439,54 @@ public: out_v[ss]=res; }); + } + void MdirAll(const CoarseVector &in,std::vector &out) + { + this->MdirComms(in); + int ndir=geom.npoint-1; + assert(out.size()==ndir); + for(int p=0;pMdirComms(in); + + int ndim = in.Grid()->Nd(); + + ////////////// + // 4D action like wilson + // 0+ => 0 + // 0- => 1 + // 1+ => 2 + // 1- => 3 + // etc.. + ////////////// + // 5D action like DWF + // 1+ => 0 + // 1- => 1 + // 2+ => 2 + // 2- => 3 + // etc.. + auto point = [dir, disp, ndim](){ + if(dir == 0 and disp == 0) + return 8; + else if ( ndim==4 ) { + return (4 * dir + 1 - disp) / 2; + } else { + return (4 * (dir-1) + 1 - disp) / 2; + } + }(); + + MdirCalc(in,out,point); + }; - void Mdiag(const CoarseVector &in, CoarseVector &out){ - Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil + void Mdiag(const CoarseVector &in, CoarseVector &out) + { + int point=geom.npoint-1; + MdirCalc(in, out, point); // No comms }; @@ -511,16 +536,12 @@ public: for(int i=0;i &out) = 0; // Abstract base virtual void Op (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base @@ -83,6 +84,9 @@ public: void OpDir (const Field &in, Field &out,int dir,int disp) { _Mat.Mdir(in,out,dir,disp); } + void OpDirAll (const Field &in, std::vector &out){ + _Mat.MdirAll(in,out); + }; void Op (const Field &in, Field &out){ _Mat.M(in,out); } @@ -116,6 +120,9 @@ public: _Mat.Mdir(in,out,dir,disp); assert(0); } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; void Op (const Field &in, Field &out){ _Mat.M(in,out); assert(0); @@ -154,6 +161,9 @@ public: void OpDir (const Field &in, Field &out,int dir,int disp) { _Mat.Mdir(in,out,dir,disp); } + void OpDirAll (const Field &in, std::vector &out){ + _Mat.MdirAll(in,out); + }; void Op (const Field &in, Field &out){ _Mat.M(in,out); } @@ -183,6 +193,9 @@ public: void OpDir (const Field &in, Field &out,int dir,int disp) { _Mat.Mdir(in,out,dir,disp); } + void OpDirAll (const Field &in, std::vector &out){ + _Mat.MdirAll(in,out); + }; void Op (const Field &in, Field &out){ _Mat.M(in,out); } @@ -234,6 +247,9 @@ public: void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; }; template class SchurDiagMooeeOperator : public SchurOperatorBase { diff --git a/Grid/algorithms/SparseMatrix.h b/Grid/algorithms/SparseMatrix.h index ffed7527..fd713e9f 100644 --- a/Grid/algorithms/SparseMatrix.h +++ b/Grid/algorithms/SparseMatrix.h @@ -47,6 +47,7 @@ public: } virtual void Mdiag (const Field &in, Field &out)=0; virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; + virtual void MdirAll (const Field &in, std::vector &out)=0; }; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -56,12 +57,12 @@ template class CheckerBoardedSparseMatrixBase : public SparseMatrix public: virtual GridBase *RedBlackGrid(void)=0; - ////////////////////////////////////////////////////////////////////// - // Query the even even properties to make algorithmic decisions - ////////////////////////////////////////////////////////////////////// - virtual RealD Mass(void) { return 0.0; }; - virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden - virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better + ////////////////////////////////////////////////////////////////////// + // Query the even even properties to make algorithmic decisions + ////////////////////////////////////////////////////////////////////// + virtual RealD Mass(void) { return 0.0; }; + virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden + virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better // half checkerboard operaions virtual void Meooe (const Field &in, Field &out)=0; diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index 333ba49b..c2ccb98b 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -101,7 +101,8 @@ public: virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); // Efficient support for multigrid coarsening - virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + virtual void MdirAll(const FermionField &in, std::vector &out); void Meooe5D (const FermionField &in, FermionField &out); void MeooeDag5D (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h index 379c5f8f..5aa7bfbd 100644 --- a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -62,14 +62,15 @@ public: // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + virtual void MdirAll(const FermionField &in, std::vector &out); - /////////////////////////////////////////////////////////////// - // Physical surface field utilities - /////////////////////////////////////////////////////////////// - // virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case - // virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case - virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); - virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + // virtual void Dminus(const FermionField &psi, FermionField &chi); // Inherit trivial case + // virtual void DminusDag(const FermionField &psi, FermionField &chi); // Inherit trivial case + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); // Constructors ContinuedFractionFermion5D(GaugeField &_Umu, diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index c60a2e84..cbc6ca63 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -89,6 +89,7 @@ public: virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + virtual void MdirAll(const FermionField &in, std::vector &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { assert(0);}; diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h index b4d8d60b..5cb95ca6 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -103,6 +103,7 @@ public: // Multigrid assistance; force term uses too /////////////////////////////////////////////////////////////// void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void MdirAll(const FermionField &in, std::vector &out); void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); /////////////////////////////////////////////////////////////// diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h index b10c0356..8e3d4be5 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -86,7 +86,8 @@ public: void MooeeDag (const FermionField &in, FermionField &out); void MooeeInvDag (const FermionField &in, FermionField &out); - void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + void MdirAll(const FermionField &in, std::vector &out); void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); // These can be overridden by fancy 5d chiral action diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.h b/Grid/qcd/action/fermion/PartialFractionFermion5D.h index d61515f0..928abd3f 100644 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.h +++ b/Grid/qcd/action/fermion/PartialFractionFermion5D.h @@ -67,12 +67,13 @@ public: // Efficient support for multigrid coarsening virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + virtual void MdirAll(const FermionField &in, std::vector &out); - /////////////////////////////////////////////////////////////// - // Physical surface field utilities - /////////////////////////////////////////////////////////////// - virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); - virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); // Constructors PartialFractionFermion5D(GaugeField &_Umu, diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index 3a712435..a3f5d2d7 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -115,9 +115,10 @@ public: // Multigrid assistance; force term uses too /////////////////////////////////////////////////////////////// void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void MdirAll(const FermionField &in, std::vector &out); void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); - void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp, - int gamma, int dag); + void DhopDirAll(const FermionField &in, std::vector &out); + void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag); /////////////////////////////////////////////////////////////// // Extra methods added by derived diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 8f1073db..58b54421 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -111,15 +111,16 @@ public: virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + virtual void MdirAll(const FermionField &in, std::vector &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac // These can be overridden by fancy 5d chiral action virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; - void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; - void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; + void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; + void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; + void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; // Implement hopping term non-hermitian hopping term; half cb or both // Implement s-diagonal DW @@ -131,6 +132,9 @@ public: // add a DhopComm // -- suboptimal interface will presently trigger multiple comms. void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); + void DhopDirAll(const FermionField &in,std::vector &out); + void DhopDirComms(const FermionField &in); + void DhopDirCalc(const FermionField &in, FermionField &out,int point); /////////////////////////////////////////////////////////////// // New methods added diff --git a/Grid/qcd/action/fermion/g5HermitianLinop.h b/Grid/qcd/action/fermion/g5HermitianLinop.h index 2e417ceb..90bb6d59 100644 --- a/Grid/qcd/action/fermion/g5HermitianLinop.h +++ b/Grid/qcd/action/fermion/g5HermitianLinop.h @@ -54,6 +54,14 @@ public: _Mat.Mdir(in,tmp,dir,disp); G5R5(out,tmp); } + void OpDirAll(const Field &in, std::vector &out) { + Field tmp(in.Grid()); + _Mat.MdirAll(in,out); + for(int p=0;p &out) { + _Mat.MdirAll(in,out); + for(int p=0;p::Mdir (const FermionField &psi, FermionField &chi,in Meo5D(psi,tmp); this->DhopDir(tmp,chi,dir,disp); } +template +void CayleyFermion5D::MdirAll(const FermionField &psi, std::vector &out) +{ + FermionField tmp(psi.Grid()); + Meo5D(psi,tmp); + this->DhopDirAll(tmp,out); +} + // force terms; five routines; default to Dhop on diagonal template void CayleyFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) diff --git a/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h index 7af02263..beeb3e00 100644 --- a/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h @@ -143,6 +143,25 @@ void ContinuedFractionFermion5D::Mdir (const FermionField &psi, FermionFi } } template +void ContinuedFractionFermion5D::MdirAll (const FermionField &psi, std::vector &chi) +{ + int Ls = this->Ls; + + this->DhopDirAll(psi,chi); // Dslash on diagonal. g5 Dslash is hermitian + + for(int p=0;p void ContinuedFractionFermion5D::Meooe (const FermionField &psi, FermionField &chi) { int Ls = this->Ls; diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index c42e896f..fdaa2f71 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -538,10 +538,16 @@ void ImprovedStaggeredFermion5D::ZeroCounters(void) // Implement the general interface. Here we use SAME mass on all slices ///////////////////////////////////////////////////////////////////////// template -void ImprovedStaggeredFermion5D::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { +void ImprovedStaggeredFermion5D::Mdir(const FermionField &in, FermionField &out, int dir, int disp) +{ DhopDir(in, out, dir, disp); } template +void ImprovedStaggeredFermion5D::MdirAll(const FermionField &in, std::vector &out) +{ + assert(0); +} +template RealD ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index e2605d81..b4359879 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -362,12 +362,19 @@ void ImprovedStaggeredFermion::DhopEO(const FermionField &in, FermionField } template -void ImprovedStaggeredFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { +void ImprovedStaggeredFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) +{ DhopDir(in, out, dir, disp); } +template +void ImprovedStaggeredFermion::MdirAll(const FermionField &in, std::vector &out) +{ + assert(0); // Not implemented yet +} template -void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { +void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) +{ Compressor compressor; Stencil.HaloExchange(in, compressor); @@ -380,6 +387,7 @@ void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionFiel }); }; + template void ImprovedStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, diff --git a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h index 9f8f91ad..edc674cc 100644 --- a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h @@ -31,7 +31,7 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); -template + template void PartialFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ // this does both dag and undag but is trivial; make a common helper routing int Ls = this->Ls; @@ -45,8 +45,25 @@ void PartialFractionFermion5D::Mdir (const FermionField &psi, FermionFiel ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1); } ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1); - } +template +void PartialFractionFermion5D::MdirAll (const FermionField &psi, std::vector &chi){ + // this does both dag and undag but is trivial; make a common helper routing + int Ls = this->Ls; + + this->DhopDirAll(psi,chi); + + for(int point=0;point void PartialFractionFermion5D::Meooe_internal(const FermionField &psi, FermionField &chi,int dag) { diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 1bdc9a64..a2092960 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -241,6 +241,30 @@ void WilsonFermion5D::DhopDir(const FermionField &in, FermionField &out,in Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma); }; +template +void WilsonFermion5D::DhopDirAll(const FermionField &in, std::vector &out) +{ + Compressor compressor(DaggerNo); + Stencil.HaloExchange(in,compressor); + + assert( (out.size()==8)||(out.size()==9)); + for(int dir5=1;dir5<=4;dir5++) { + for(int disp=-1;disp<=1;disp+=2){ + int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil + // we drop off the innermost fifth dimension + assert( (disp==1)||(disp==-1) ); + assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; + + int skip = (disp==1) ? 0 : 1; + int dirdisp = dir+skip*4; + int gamma = dir+(1-skip)*4; + + uint64_t Nsite = Umu.Grid()->oSites(); + Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out[dirdisp],dirdisp,gamma); + } + } +}; + template void WilsonFermion5D::DerivInternal(StencilImpl & st, diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 756bdbf4..76b904e9 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -319,28 +319,51 @@ void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int d } template -void WilsonFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { +void WilsonFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) +{ DhopDir(in, out, dir, disp); } +template +void WilsonFermion::MdirAll(const FermionField &in, std::vector &out) +{ + DhopDirAll(in, out); +} template void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { + Compressor compressor(DaggerNo); + Stencil.HaloExchange(in, compressor); + int skip = (disp == 1) ? 0 : 1; int dirdisp = dir + skip * 4; int gamma = dir + (1 - skip) * 4; - DhopDirDisp(in, out, dirdisp, gamma, DaggerNo); + DhopDirCalc(in, out, dirdisp, gamma, DaggerNo); }; - template -void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) +void WilsonFermion::DhopDirAll(const FermionField &in, std::vector &out) { - Compressor compressor(dag); - + Compressor compressor(DaggerNo); Stencil.HaloExchange(in, compressor); + + assert((out.size()==8)||(out.size()==9)); + for(int dir=0;dir +void WilsonFermion::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) +{ int Ls=1; - int Nsite=in.oSites(); + uint64_t Nsite=in.oSites(); Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma); }; @@ -348,7 +371,8 @@ template void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, - FermionField &out, int dag) { + FermionField &out, int dag) +{ #ifdef GRID_OMP if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,in,out,dag); diff --git a/Grid/qcd/utils/CovariantLaplacian.h b/Grid/qcd/utils/CovariantLaplacian.h index 0e0620a7..94322507 100644 --- a/Grid/qcd/utils/CovariantLaplacian.h +++ b/Grid/qcd/utils/CovariantLaplacian.h @@ -92,6 +92,7 @@ public: }; void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);} + void MdirAll(const GaugeField&, std::vector &){ assert(0);} void Mdiag(const GaugeField&, GaugeField&){ assert(0);} void ImportGauge(const GaugeField& _U) {