1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-25 13:15:55 +01:00

Added hand-unrolled kernel implementations of all the other dslash precision / comms precision combinations with G-parity

This commit is contained in:
Christopher Kelly 2017-08-23 14:44:40 -04:00
parent 46f88e6d72
commit a0bb8e5b46

View File

@ -786,120 +786,113 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
const FermionField &in, \ const FermionField &in, \
FermionField &out){ assert(0); } \ FermionField &out){ assert(0); } \
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
//HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
#define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
template<> void int ss,int sU,const FermionField &in, FermionField &out) \
WilsonKernels<GparityWilsonImplD>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, { \
int ss,int sU,const FermionField &in, FermionField &out) typedef IMPL Impl; \
{ typedef typename Simd::scalar_type S; \
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::vector_type V; \
typedef GparityWilsonImplD Impl; \
typedef typename Simd::scalar_type S; HAND_DECLARATIONS(ignore); \
typedef typename Simd::vector_type V; \
int offset,local,perm, ptype; \
HAND_DECLARATIONS(ignore); StencilEntry *SE; \
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
int offset,local,perm, ptype; HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
StencilEntry *SE; } \
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); template<> \
} void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> { \
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, typedef IMPL Impl; \
int ss,int sU,const FermionField &in, FermionField &out) typedef typename Simd::scalar_type S; \
{ typedef typename Simd::vector_type V; \
typedef GparityWilsonImplD Impl; \
typedef typename Simd::scalar_type S; HAND_DECLARATIONS(ignore); \
typedef typename Simd::vector_type V; \
StencilEntry *SE; \
HAND_DECLARATIONS(ignore); int offset,local,perm, ptype; \
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
StencilEntry *SE; HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
int offset,local,perm, ptype; } \
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); template<> void \
} WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
template<> void { \
WilsonKernels<GparityWilsonImplD>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, typedef IMPL Impl; \
int ss,int sU,const FermionField &in, FermionField &out) typedef typename Simd::scalar_type S; \
{ typedef typename Simd::vector_type V; \
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... \
typedef GparityWilsonImplD Impl; HAND_DECLARATIONS(ignore); \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; int offset,local,perm, ptype; \
StencilEntry *SE; \
HAND_DECLARATIONS(ignore); HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
int offset,local,perm, ptype; } \
StencilEntry *SE; \
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); template<> \
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
} int ss,int sU,const FermionField &in, FermionField &out) \
{ \
template<> typedef IMPL Impl; \
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, typedef typename Simd::scalar_type S; \
int ss,int sU,const FermionField &in, FermionField &out) typedef typename Simd::vector_type V; \
{ \
typedef GparityWilsonImplD Impl; HAND_DECLARATIONS(ignore); \
typedef typename Simd::scalar_type S; \
typedef typename Simd::vector_type V; StencilEntry *SE; \
int offset,local,perm, ptype; \
HAND_DECLARATIONS(ignore); HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
StencilEntry *SE; } \
int offset,local,perm, ptype; \
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); template<> void \
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
} int ss,int sU,const FermionField &in, FermionField &out) \
{ \
template<> void typedef IMPL Impl; \
WilsonKernels<GparityWilsonImplD>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, typedef typename Simd::scalar_type S; \
int ss,int sU,const FermionField &in, FermionField &out) typedef typename Simd::vector_type V; \
{ \
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... HAND_DECLARATIONS(ignore); \
typedef GparityWilsonImplD Impl; \
typedef typename Simd::scalar_type S; int offset,local,perm, ptype; \
typedef typename Simd::vector_type V; StencilEntry *SE; \
int nmu=0; \
HAND_DECLARATIONS(ignore); HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
nmu = 0; \
int offset,local,perm, ptype; HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
StencilEntry *SE; } \
int nmu=0; template<> \
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
nmu = 0; int ss,int sU,const FermionField &in, FermionField &out) \
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); { \
} typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
template<> typedef typename Simd::vector_type V; \
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) HAND_DECLARATIONS(ignore); \
{ \
typedef GparityWilsonImplD Impl; StencilEntry *SE; \
typedef typename Simd::scalar_type S; int offset,local,perm, ptype; \
typedef typename Simd::vector_type V; int nmu=0; \
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
HAND_DECLARATIONS(ignore); nmu = 0; \
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
nmu = 0;
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
} }
HAND_SPECIALISE_GPARITY(GparityWilsonImplF);
HAND_SPECIALISE_GPARITY(GparityWilsonImplD);
HAND_SPECIALISE_GPARITY(GparityWilsonImplFH);
HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);