mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 18:19:34 +01:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/paboyle/Grid into feature/gpt
This commit is contained in:
		| @@ -101,7 +101,8 @@ public: | ||||
|   virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|  | ||||
|   // Efficient support for multigrid coarsening | ||||
|   virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|  | ||||
|   void   Meooe5D       (const FermionField &in, FermionField &out); | ||||
|   void   MeooeDag5D    (const FermionField &in, FermionField &out); | ||||
|   | ||||
| @@ -62,14 +62,15 @@ public: | ||||
|  | ||||
|   // Efficient support for multigrid coarsening | ||||
|   virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Physical surface field utilities | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       //      virtual void Dminus(const FermionField &psi, FermionField &chi);     // Inherit trivial case | ||||
|       //      virtual void DminusDag(const FermionField &psi, FermionField &chi);  // Inherit trivial case | ||||
|       virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||
|       virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d); | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Physical surface field utilities | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   //      virtual void Dminus(const FermionField &psi, FermionField &chi);     // Inherit trivial case | ||||
|   //      virtual void DminusDag(const FermionField &psi, FermionField &chi);  // Inherit trivial case | ||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||
|   virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d); | ||||
|  | ||||
|   // Constructors | ||||
|   ContinuedFractionFermion5D(GaugeField &_Umu, | ||||
|   | ||||
| @@ -89,6 +89,7 @@ public: | ||||
|  | ||||
|   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's | ||||
|   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||
|  | ||||
|  | ||||
|       virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; | ||||
|   | ||||
| @@ -103,6 +103,7 @@ public: | ||||
|   // Multigrid assistance; force term uses too | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   void Mdir(const FermionField &in, FermionField &out, int dir, int disp); | ||||
|   void MdirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|   void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -86,7 +86,8 @@ public: | ||||
|   void   MooeeDag    (const FermionField &in, FermionField &out); | ||||
|   void   MooeeInvDag (const FermionField &in, FermionField &out); | ||||
|  | ||||
|   void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   void Mdir   (const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   void MdirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|   void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); | ||||
|  | ||||
|   // These can be overridden by fancy 5d chiral action | ||||
|   | ||||
| @@ -67,12 +67,13 @@ public: | ||||
|  | ||||
|   // Efficient support for multigrid coarsening | ||||
|   virtual void  Mdir (const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       // Physical surface field utilities | ||||
|       /////////////////////////////////////////////////////////////// | ||||
|       virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||
|       virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d); | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Physical surface field utilities | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); | ||||
|   virtual void ImportPhysicalFermionSource  (const FermionField &input4d,FermionField &imported5d); | ||||
|  | ||||
|   // Constructors | ||||
|   PartialFractionFermion5D(GaugeField &_Umu, | ||||
|   | ||||
| @@ -115,9 +115,10 @@ public: | ||||
|   // Multigrid assistance; force term uses too | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   void Mdir(const FermionField &in, FermionField &out, int dir, int disp); | ||||
|   void MdirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|   void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); | ||||
|   void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp, | ||||
|                    int gamma, int dag); | ||||
|   void DhopDirAll(const FermionField &in, std::vector<FermionField> &out); | ||||
|   void DhopDirCalc(const FermionField &in, FermionField &out, int dirdisp,int gamma, int dag); | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Extra methods added by derived | ||||
|   | ||||
| @@ -111,15 +111,16 @@ public: | ||||
|   virtual void   MooeeDag    (const FermionField &in, FermionField &out){assert(0);}; | ||||
|   virtual void   MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; | ||||
|   virtual void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||
|   virtual void   MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||
|  | ||||
|   // These can be overridden by fancy 5d chiral action | ||||
|   virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|   virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|   virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); | ||||
|  | ||||
|       void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; | ||||
|       void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; | ||||
|       void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; | ||||
|   void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; | ||||
|   void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; | ||||
|   void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ; | ||||
|  | ||||
|   // Implement hopping term non-hermitian hopping term; half cb or both | ||||
|   // Implement s-diagonal DW | ||||
| @@ -131,6 +132,9 @@ public: | ||||
|   // add a DhopComm | ||||
|   // -- suboptimal interface will presently trigger multiple comms. | ||||
|   void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); | ||||
|   void DhopDirAll(const FermionField &in,std::vector<FermionField> &out); | ||||
|   void DhopDirComms(const FermionField &in); | ||||
|   void DhopDirCalc(const FermionField &in, FermionField &out,int point); | ||||
|      | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // New methods added  | ||||
|   | ||||
| @@ -60,6 +60,9 @@ public: | ||||
| 			    int Ls, int Nsite, const FermionField &in, FermionField &out, | ||||
| 			    int interior=1,int exterior=1) ; | ||||
|  | ||||
|   static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, | ||||
| 			  int Nsite, const FermionField &in, std::vector<FermionField> &out) ; | ||||
|  | ||||
|   static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, | ||||
| 			    int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); | ||||
|  | ||||
| @@ -100,8 +103,17 @@ public: | ||||
|  | ||||
| private: | ||||
|  | ||||
|   static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, | ||||
|   static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, | ||||
| 				   int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); | ||||
|  | ||||
|   static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirTp(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|   static accelerator_inline void DhopDirTm(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp); | ||||
|        | ||||
|   // Specialised variants | ||||
|   static accelerator void GenericDhopSite(StencilView &st,  DoubledGaugeFieldView &U, SiteHalfSpinor * buf, | ||||
|   | ||||
| @@ -54,6 +54,14 @@ public: | ||||
|     _Mat.Mdir(in,tmp,dir,disp); | ||||
|     G5R5(out,tmp); | ||||
|   } | ||||
|   void OpDirAll(const Field &in, std::vector<Field> &out) { | ||||
|     Field tmp(in.Grid()); | ||||
|     _Mat.MdirAll(in,out); | ||||
|     for(int p=0;p<out.size();p++) { | ||||
|       tmp=out[p]; | ||||
|       G5R5(out[p],tmp); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
|  | ||||
| @@ -96,6 +104,12 @@ public: | ||||
|     _Mat.Mdir(in,tmp,dir,disp); | ||||
|     out=g5*tmp; | ||||
|   } | ||||
|   void OpDirAll(const Field &in, std::vector<Field> &out) { | ||||
|     _Mat.MdirAll(in,out); | ||||
|     for(int p=0;p<out.size();p++) { | ||||
|       out[p]=g5*out[p]; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ | ||||
|  | ||||
|   | ||||
| @@ -389,6 +389,14 @@ void  CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,in | ||||
|   Meo5D(psi,tmp); | ||||
|   this->DhopDir(tmp,chi,dir,disp); | ||||
| } | ||||
| template<class Impl> | ||||
| void  CayleyFermion5D<Impl>::MdirAll(const FermionField &psi, std::vector<FermionField> &out) | ||||
| { | ||||
|   FermionField tmp(psi.Grid()); | ||||
|   Meo5D(psi,tmp); | ||||
|   this->DhopDirAll(tmp,out); | ||||
| } | ||||
|  | ||||
| // force terms; five routines; default to Dhop on diagonal | ||||
| template<class Impl> | ||||
| void CayleyFermion5D<Impl>::MDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) | ||||
|   | ||||
| @@ -143,6 +143,25 @@ void  ContinuedFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFi | ||||
|   } | ||||
| } | ||||
| template<class Impl> | ||||
| void  ContinuedFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi) | ||||
| { | ||||
|   int Ls = this->Ls; | ||||
|  | ||||
|   this->DhopDirAll(psi,chi); // Dslash on diagonal. g5 Dslash is hermitian | ||||
|  | ||||
|   for(int p=0;p<chi.size();p++){ | ||||
|     int sign=1; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       if ( s==(Ls-1) ){ | ||||
| 	ag5xpby_ssp(chi[p],Beta[s]*ZoloHiInv,chi[p],0.0,chi[p],s,s); | ||||
|       } else { | ||||
| 	ag5xpby_ssp(chi[p],cc[s]*Beta[s]*sign*ZoloHiInv,chi[p],0.0,chi[p],s,s); | ||||
|       } | ||||
|       sign=-sign;  | ||||
|     } | ||||
|   } | ||||
| } | ||||
| template<class Impl> | ||||
| void   ContinuedFractionFermion5D<Impl>::Meooe       (const FermionField &psi, FermionField &chi) | ||||
| { | ||||
|   int Ls = this->Ls; | ||||
|   | ||||
| @@ -538,10 +538,16 @@ void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void) | ||||
| // Implement the general interface. Here we use SAME mass on all slices | ||||
| ///////////////////////////////////////////////////////////////////////// | ||||
| template <class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { | ||||
| void ImprovedStaggeredFermion5D<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)  | ||||
| { | ||||
|   DhopDir(in, out, dir, disp); | ||||
| } | ||||
| template <class Impl> | ||||
| void ImprovedStaggeredFermion5D<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)  | ||||
| { | ||||
|   assert(0); | ||||
| } | ||||
| template <class Impl> | ||||
| RealD ImprovedStaggeredFermion5D<Impl>::M(const FermionField &in, FermionField &out) { | ||||
|   out.Checkerboard() = in.Checkerboard(); | ||||
|   Dhop(in, out, DaggerNo); | ||||
|   | ||||
| @@ -362,12 +362,19 @@ void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { | ||||
| void ImprovedStaggeredFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)  | ||||
| { | ||||
|   DhopDir(in, out, dir, disp); | ||||
| } | ||||
| template <class Impl> | ||||
| void ImprovedStaggeredFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)  | ||||
| { | ||||
|   assert(0); // Not implemented yet | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { | ||||
| void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)  | ||||
| { | ||||
|  | ||||
|   Compressor compressor; | ||||
|   Stencil.HaloExchange(in, compressor); | ||||
| @@ -380,6 +387,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel | ||||
|   }); | ||||
| }; | ||||
|  | ||||
|  | ||||
| template <class Impl> | ||||
| void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, | ||||
| 						  DoubledGaugeField &U, | ||||
| @@ -404,7 +412,6 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st | ||||
| #ifdef GRID_OMP | ||||
|   Compressor compressor;  | ||||
|   int len =  U.Grid()->oSites(); | ||||
|   const int LLs =  1; | ||||
|  | ||||
|   DhopTotalTime   -= usecond(); | ||||
|  | ||||
|   | ||||
| @@ -31,7 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
| template<class Impl> | ||||
|  template<class Impl> | ||||
| void  PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ | ||||
|   // this does both dag and undag but is trivial; make a common helper routing | ||||
|   int Ls = this->Ls; | ||||
| @@ -45,8 +45,25 @@ void  PartialFractionFermion5D<Impl>::Mdir (const FermionField &psi, FermionFiel | ||||
|     ag5xpby_ssp(chi, scale,chi,0.0,chi,s+1,s+1);  | ||||
|   } | ||||
|   ag5xpby_ssp(chi,p[nblock]*scale/amax,chi,0.0,chi,Ls-1,Ls-1); | ||||
|  | ||||
| } | ||||
| template<class Impl> | ||||
| void  PartialFractionFermion5D<Impl>::MdirAll (const FermionField &psi, std::vector<FermionField> &chi){ | ||||
|   // this does both dag and undag but is trivial; make a common helper routing | ||||
|   int Ls = this->Ls; | ||||
|  | ||||
|   this->DhopDirAll(psi,chi); | ||||
|  | ||||
|   for(int point=0;point<chi.size();point++){ | ||||
|     int nblock=(Ls-1)/2; | ||||
|     for(int b=0;b<nblock;b++){ | ||||
|       int s = 2*b; | ||||
|       ag5xpby_ssp(chi[point],-scale,chi[point],0.0,chi[point],s,s);  | ||||
|       ag5xpby_ssp(chi[point], scale,chi[point],0.0,chi[point],s+1,s+1);  | ||||
|     } | ||||
|     ag5xpby_ssp(chi[point],p[nblock]*scale/amax,chi[point],0.0,chi[point],Ls-1,Ls-1); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template<class Impl> | ||||
| void   PartialFractionFermion5D<Impl>::Meooe_internal(const FermionField &psi, FermionField &chi,int dag) | ||||
| { | ||||
|   | ||||
| @@ -241,6 +241,15 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in | ||||
|   Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma); | ||||
|  | ||||
| }; | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out) | ||||
| { | ||||
|   Compressor compressor(DaggerNo); | ||||
|   Stencil.HaloExchange(in,compressor); | ||||
|   uint64_t Nsite = Umu.Grid()->oSites(); | ||||
|   Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out); | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<class Impl> | ||||
| void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, | ||||
|   | ||||
| @@ -319,28 +319,51 @@ void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int d | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp) { | ||||
| void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int dir, int disp)  | ||||
| { | ||||
|   DhopDir(in, out, dir, disp); | ||||
| } | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::MdirAll(const FermionField &in, std::vector<FermionField> &out)  | ||||
| { | ||||
|   DhopDirAll(in, out); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)  | ||||
| { | ||||
|   Compressor compressor(DaggerNo); | ||||
|   Stencil.HaloExchange(in, compressor); | ||||
|  | ||||
|   int skip = (disp == 1) ? 0 : 1; | ||||
|   int dirdisp = dir + skip * 4; | ||||
|   int gamma = dir + (1 - skip) * 4; | ||||
|  | ||||
|   DhopDirDisp(in, out, dirdisp, gamma, DaggerNo); | ||||
|   DhopDirCalc(in, out, dirdisp, gamma, DaggerNo); | ||||
| }; | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)  | ||||
| void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)  | ||||
| { | ||||
|   Compressor compressor(dag); | ||||
|  | ||||
|   Compressor compressor(DaggerNo); | ||||
|   Stencil.HaloExchange(in, compressor); | ||||
|  | ||||
|   assert((out.size()==8)||(out.size()==9));  | ||||
|   for(int dir=0;dir<Nd;dir++){ | ||||
|     for(int disp=-1;disp<=1;disp+=2){ | ||||
|  | ||||
|       int skip = (disp == 1) ? 0 : 1; | ||||
|       int dirdisp = dir + skip * 4; | ||||
|       int gamma = dir + (1 - skip) * 4; | ||||
|  | ||||
|       DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo); | ||||
|     } | ||||
|   } | ||||
| } | ||||
| template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopDirCalc(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag)  | ||||
| { | ||||
|   int Ls=1; | ||||
|   int Nsite=in.oSites(); | ||||
|   uint64_t Nsite=in.oSites(); | ||||
|   Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma); | ||||
| }; | ||||
|  | ||||
| @@ -348,7 +371,8 @@ template <class Impl> | ||||
| void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, | ||||
|                                        DoubledGaugeField &U, | ||||
|                                        const FermionField &in, | ||||
|                                        FermionField &out, int dag) { | ||||
|                                        FermionField &out, int dag)  | ||||
| { | ||||
| #ifdef GRID_OMP | ||||
|   if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) | ||||
|     DhopInternalOverlappedComms(st,lo,U,in,out,dag); | ||||
|   | ||||
| @@ -91,8 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) | ||||
|   }								\ | ||||
|   synchronise();						 | ||||
|  | ||||
| #define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon)			\ | ||||
|   if (gamma == Dir) {						\ | ||||
| #define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon)		\ | ||||
|     if (SE->_is_local ) {					\ | ||||
|       int perm= SE->_permute;					\ | ||||
|       auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	\ | ||||
| @@ -102,10 +101,14 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) | ||||
|     }								\ | ||||
|     synchronise();						\ | ||||
|     Impl::multLink(Uchi, U[sU], chi, dir, SE, st);		\ | ||||
|     Recon(result, Uchi);					\ | ||||
|     synchronise();						\ | ||||
|     Recon(result, Uchi);					 | ||||
|  | ||||
| #define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon)			\ | ||||
|   if (gamma == Dir) {						\ | ||||
|     GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon);			\ | ||||
|   } | ||||
|  | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////// | ||||
|   // All legs kernels ; comms then compute | ||||
|   //////////////////////////////////////////////////////////////////// | ||||
| @@ -284,7 +287,36 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st,  DoubledGaugeField | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <class Impl> | ||||
| #define DhopDirMacro(Dir,spProj,spRecon)	\ | ||||
|   template <class Impl>							\ | ||||
|   void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \ | ||||
| 					 int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \ | ||||
|   {									\ | ||||
|   typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;		\ | ||||
|   typedef decltype(coalescedRead(in[0]))  calcSpinor;			\ | ||||
|   calcHalfSpinor chi;							\ | ||||
|   calcSpinor result;							\ | ||||
|   calcHalfSpinor Uchi;							\ | ||||
|   StencilEntry *SE;							\ | ||||
|   int ptype;								\ | ||||
|   const int Nsimd = SiteHalfSpinor::Nsimd();				\ | ||||
|   const int lane=SIMTlane(Nsimd);					\ | ||||
| 									\ | ||||
|   SE = st.GetEntry(ptype, dir, sF);					\ | ||||
|   GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon);				\ | ||||
|   coalescedWrite(out[sF], result,lane);					\ | ||||
|   }									 | ||||
|  | ||||
| DhopDirMacro(Xp,spProjXp,spReconXp); | ||||
| DhopDirMacro(Yp,spProjYp,spReconYp); | ||||
| DhopDirMacro(Zp,spProjZp,spReconZp); | ||||
| DhopDirMacro(Tp,spProjTp,spReconTp); | ||||
| DhopDirMacro(Xm,spProjXm,spReconXm); | ||||
| DhopDirMacro(Ym,spProjYm,spReconYm); | ||||
| DhopDirMacro(Zm,spProjZm,spReconZm); | ||||
| DhopDirMacro(Tm,spProjTm,spReconTm); | ||||
|  | ||||
| template <class Impl>  | ||||
| void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, | ||||
| 				    int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)  | ||||
| { | ||||
| @@ -299,18 +331,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si | ||||
|   const int lane=SIMTlane(Nsimd); | ||||
|  | ||||
|   SE = st.GetEntry(ptype, dir, sF); | ||||
|   if (gamma == Xp) {						 | ||||
|     if (SE->_is_local ) {					 | ||||
|       int perm= SE->_permute;					 | ||||
|       auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	 | ||||
|       spProjXp(chi,tmp);						 | ||||
|     } else {							 | ||||
|       chi = coalescedRead(buf[SE->_offset],lane);			 | ||||
|     }								 | ||||
|     Impl::multLink(Uchi, U[sU], chi, dir, SE, st);		 | ||||
|     spReconXp(result, Uchi);					 | ||||
|   } | ||||
|  | ||||
|   GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp); | ||||
|   GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp); | ||||
|   GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp); | ||||
|   GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp); | ||||
| @@ -321,6 +342,38 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si | ||||
|   coalescedWrite(out[sF], result,lane); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, | ||||
| 				      int Nsite, const FermionField &in, std::vector<FermionField> &out)  | ||||
| { | ||||
|    auto U_v   = U.View(); | ||||
|    auto in_v  = in.View(); | ||||
|    auto st_v  = st.View(); | ||||
|  | ||||
|    auto out_Xm = out[0].View(); | ||||
|    auto out_Ym = out[1].View(); | ||||
|    auto out_Zm = out[2].View(); | ||||
|    auto out_Tm = out[3].View(); | ||||
|    auto out_Xp = out[4].View(); | ||||
|    auto out_Yp = out[5].View(); | ||||
|    auto out_Zp = out[6].View(); | ||||
|    auto out_Tp = out[7].View(); | ||||
|  | ||||
|    accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{ | ||||
|       int sU=sss/Ls;				 | ||||
|       int sF =sss;				 | ||||
|       DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0); | ||||
|       DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1); | ||||
|       DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2); | ||||
|       DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3); | ||||
|       DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4); | ||||
|       DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5); | ||||
|       DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6); | ||||
|       DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7); | ||||
|    }); | ||||
| } | ||||
|  | ||||
|  | ||||
| template <class Impl> | ||||
| void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, | ||||
| 					 int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)  | ||||
| @@ -332,13 +385,32 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S | ||||
|    auto in_v  = in.View(); | ||||
|    auto out_v = out.View(); | ||||
|    auto st_v  = st.View(); | ||||
|    accelerator_for(ss,Nsite,Simd::Nsimd(),{ | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       int sU=ss; | ||||
|       int sF = s+Ls*sU;  | ||||
|       DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma); | ||||
|     } | ||||
|   }); | ||||
| #define LoopBody(Dir)				\ | ||||
|    case Dir :			\ | ||||
|      accelerator_forNB(ss,Nsite,Simd::Nsimd(),{	\ | ||||
|        for(int s=0;s<Ls;s++){			\ | ||||
| 	 int sU=ss;				\ | ||||
| 	 int sF = s+Ls*sU;						\ | ||||
| 	 DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\ | ||||
|        }							       \ | ||||
|        });							       \ | ||||
|      break; | ||||
|  | ||||
|    switch(gamma){ | ||||
|    LoopBody(Xp); | ||||
|    LoopBody(Yp); | ||||
|    LoopBody(Zp); | ||||
|    LoopBody(Tp); | ||||
|  | ||||
|    LoopBody(Xm); | ||||
|    LoopBody(Ym); | ||||
|    LoopBody(Zm); | ||||
|    LoopBody(Tm); | ||||
|    default: | ||||
|      assert(0); | ||||
|      break; | ||||
|    } | ||||
| #undef LoopBody | ||||
| }  | ||||
|  | ||||
| #define KERNEL_CALLNB(A) \ | ||||
|   | ||||
| @@ -92,6 +92,7 @@ public: | ||||
|   }; | ||||
|  | ||||
|   void Mdir(const GaugeField&, GaugeField&, int, int){ assert(0);} | ||||
|   void MdirAll(const GaugeField&, std::vector<GaugeField> &){ assert(0);} | ||||
|   void Mdiag(const GaugeField&, GaugeField&){ assert(0);} | ||||
|  | ||||
|   void ImportGauge(const GaugeField& _U) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user