|
|
|
@ -77,23 +77,23 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
|
#define REGISTER
|
|
|
|
|
|
|
|
|
|
#ifdef GRID_SIMT
|
|
|
|
|
#define LOAD_CHIMU(ptype) \
|
|
|
|
|
#define LOAD_CHIMU(Ptype) \
|
|
|
|
|
{const SiteSpinor & ref (in[offset]); \
|
|
|
|
|
Chimu_00=coalescedReadPermute<ptype>(ref()(0)(0),perm,lane); \
|
|
|
|
|
Chimu_01=coalescedReadPermute<ptype>(ref()(0)(1),perm,lane); \
|
|
|
|
|
Chimu_02=coalescedReadPermute<ptype>(ref()(0)(2),perm,lane); \
|
|
|
|
|
Chimu_10=coalescedReadPermute<ptype>(ref()(1)(0),perm,lane); \
|
|
|
|
|
Chimu_11=coalescedReadPermute<ptype>(ref()(1)(1),perm,lane); \
|
|
|
|
|
Chimu_12=coalescedReadPermute<ptype>(ref()(1)(2),perm,lane); \
|
|
|
|
|
Chimu_20=coalescedReadPermute<ptype>(ref()(2)(0),perm,lane); \
|
|
|
|
|
Chimu_21=coalescedReadPermute<ptype>(ref()(2)(1),perm,lane); \
|
|
|
|
|
Chimu_22=coalescedReadPermute<ptype>(ref()(2)(2),perm,lane); \
|
|
|
|
|
Chimu_30=coalescedReadPermute<ptype>(ref()(3)(0),perm,lane); \
|
|
|
|
|
Chimu_31=coalescedReadPermute<ptype>(ref()(3)(1),perm,lane); \
|
|
|
|
|
Chimu_32=coalescedReadPermute<ptype>(ref()(3)(2),perm,lane); }
|
|
|
|
|
Chimu_00=coalescedReadPermute<Ptype>(ref()(0)(0),perm,lane); \
|
|
|
|
|
Chimu_01=coalescedReadPermute<Ptype>(ref()(0)(1),perm,lane); \
|
|
|
|
|
Chimu_02=coalescedReadPermute<Ptype>(ref()(0)(2),perm,lane); \
|
|
|
|
|
Chimu_10=coalescedReadPermute<Ptype>(ref()(1)(0),perm,lane); \
|
|
|
|
|
Chimu_11=coalescedReadPermute<Ptype>(ref()(1)(1),perm,lane); \
|
|
|
|
|
Chimu_12=coalescedReadPermute<Ptype>(ref()(1)(2),perm,lane); \
|
|
|
|
|
Chimu_20=coalescedReadPermute<Ptype>(ref()(2)(0),perm,lane); \
|
|
|
|
|
Chimu_21=coalescedReadPermute<Ptype>(ref()(2)(1),perm,lane); \
|
|
|
|
|
Chimu_22=coalescedReadPermute<Ptype>(ref()(2)(2),perm,lane); \
|
|
|
|
|
Chimu_30=coalescedReadPermute<Ptype>(ref()(3)(0),perm,lane); \
|
|
|
|
|
Chimu_31=coalescedReadPermute<Ptype>(ref()(3)(1),perm,lane); \
|
|
|
|
|
Chimu_32=coalescedReadPermute<Ptype>(ref()(3)(2),perm,lane); }
|
|
|
|
|
#define PERMUTE_DIR(dir) ;
|
|
|
|
|
#else
|
|
|
|
|
#define LOAD_CHIMU(ptype) \
|
|
|
|
|
#define LOAD_CHIMU(Ptype) \
|
|
|
|
|
{const SiteSpinor & ref (in[offset]); \
|
|
|
|
|
Chimu_00=ref()(0)(0);\
|
|
|
|
|
Chimu_01=ref()(0)(1);\
|
|
|
|
@ -109,12 +109,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
|
Chimu_32=ref()(3)(2);}
|
|
|
|
|
|
|
|
|
|
#define PERMUTE_DIR(dir) \
|
|
|
|
|
permute##dir(Chi_00,Chi_00); \
|
|
|
|
|
permute##dir(Chi_01,Chi_01);\
|
|
|
|
|
permute##dir(Chi_02,Chi_02);\
|
|
|
|
|
permute##dir(Chi_10,Chi_10); \
|
|
|
|
|
permute##dir(Chi_11,Chi_11);\
|
|
|
|
|
permute##dir(Chi_12,Chi_12);
|
|
|
|
|
permute##dir(Chi_00,Chi_00); \
|
|
|
|
|
permute##dir(Chi_01,Chi_01); \
|
|
|
|
|
permute##dir(Chi_02,Chi_02); \
|
|
|
|
|
permute##dir(Chi_10,Chi_10); \
|
|
|
|
|
permute##dir(Chi_11,Chi_11); \
|
|
|
|
|
permute##dir(Chi_12,Chi_12);
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
@ -371,88 +371,91 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
|
|
|
result_32-= UChi_12;
|
|
|
|
|
|
|
|
|
|
#define HAND_STENCIL_LEGB(PROJ,PERM,DIR,RECON) \
|
|
|
|
|
SE=st.GetEntry(ptype,DIR,ss); \
|
|
|
|
|
offset = SE->_offset; \
|
|
|
|
|
local = SE->_is_local; \
|
|
|
|
|
perm = SE->_permute; \
|
|
|
|
|
if ( local ) { \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
if ( perm) { \
|
|
|
|
|
PERMUTE_DIR(PERM); \
|
|
|
|
|
} \
|
|
|
|
|
} else { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON;
|
|
|
|
|
{int ptype; \
|
|
|
|
|
SE=st.GetEntry(ptype,DIR,ss); \
|
|
|
|
|
auto offset = SE->_offset; \
|
|
|
|
|
auto local = SE->_is_local; \
|
|
|
|
|
auto perm = SE->_permute; \
|
|
|
|
|
if ( local ) { \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
if ( perm) { \
|
|
|
|
|
PERMUTE_DIR(PERM); \
|
|
|
|
|
} \
|
|
|
|
|
} else { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; }
|
|
|
|
|
|
|
|
|
|
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
|
|
|
|
SE=&st_p[DIR+8*ss]; \
|
|
|
|
|
ptype=st_perm[DIR]; \
|
|
|
|
|
offset = SE->_offset; \
|
|
|
|
|
local = SE->_is_local; \
|
|
|
|
|
perm = SE->_permute; \
|
|
|
|
|
if ( local ) { \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
if ( perm) { \
|
|
|
|
|
PERMUTE_DIR(PERM); \
|
|
|
|
|
} \
|
|
|
|
|
} else { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON;
|
|
|
|
|
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
|
|
|
|
{ SE=&st_p[DIR+8*ss]; \
|
|
|
|
|
auto ptype=st_perm[DIR]; \
|
|
|
|
|
auto offset = SE->_offset; \
|
|
|
|
|
auto local = SE->_is_local; \
|
|
|
|
|
auto perm = SE->_permute; \
|
|
|
|
|
if ( local ) { \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
if ( perm) { \
|
|
|
|
|
PERMUTE_DIR(PERM); \
|
|
|
|
|
} \
|
|
|
|
|
} else { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; }
|
|
|
|
|
|
|
|
|
|
#define HAND_STENCIL_LEGA(PROJ,PERM,DIR,RECON) \
|
|
|
|
|
SE=&st_p[DIR+8*ss]; \
|
|
|
|
|
ptype=st_perm[DIR]; \
|
|
|
|
|
/*SE=st.GetEntry(ptype,DIR,ss);*/ \
|
|
|
|
|
offset = SE->_offset; \
|
|
|
|
|
perm = SE->_permute; \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON;
|
|
|
|
|
{ SE=&st_p[DIR+8*ss]; \
|
|
|
|
|
auto ptype=st_perm[DIR]; \
|
|
|
|
|
/*SE=st.GetEntry(ptype,DIR,ss);*/ \
|
|
|
|
|
auto offset = SE->_offset; \
|
|
|
|
|
auto perm = SE->_permute; \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; }
|
|
|
|
|
|
|
|
|
|
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
|
|
|
|
|
SE=st.GetEntry(ptype,DIR,ss); \
|
|
|
|
|
offset = SE->_offset; \
|
|
|
|
|
local = SE->_is_local; \
|
|
|
|
|
perm = SE->_permute; \
|
|
|
|
|
if ( local ) { \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
if ( perm) { \
|
|
|
|
|
PERMUTE_DIR(PERM); \
|
|
|
|
|
} \
|
|
|
|
|
} else if ( st.same_node[DIR] ) { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); \
|
|
|
|
|
if (local || st.same_node[DIR] ) { \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise();
|
|
|
|
|
{ int ptype; \
|
|
|
|
|
SE=st.GetEntry(ptype,DIR,ss); \
|
|
|
|
|
auto offset = SE->_offset; \
|
|
|
|
|
auto local = SE->_is_local; \
|
|
|
|
|
auto perm = SE->_permute; \
|
|
|
|
|
if ( local ) { \
|
|
|
|
|
LOAD_CHIMU(PERM); \
|
|
|
|
|
PROJ; \
|
|
|
|
|
if ( perm) { \
|
|
|
|
|
PERMUTE_DIR(PERM); \
|
|
|
|
|
} \
|
|
|
|
|
} else if ( st.same_node[DIR] ) { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); \
|
|
|
|
|
if (local || st.same_node[DIR] ) { \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); }
|
|
|
|
|
|
|
|
|
|
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
|
|
|
|
|
SE=st.GetEntry(ptype,DIR,ss); \
|
|
|
|
|
offset = SE->_offset; \
|
|
|
|
|
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; \
|
|
|
|
|
nmu++; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise();
|
|
|
|
|
{ int ptype; \
|
|
|
|
|
SE=st.GetEntry(ptype,DIR,ss); \
|
|
|
|
|
auto offset = SE->_offset; \
|
|
|
|
|
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
|
|
|
|
LOAD_CHI; \
|
|
|
|
|
MULT_2SPIN(DIR); \
|
|
|
|
|
RECON; \
|
|
|
|
|
nmu++; \
|
|
|
|
|
} \
|
|
|
|
|
acceleratorSynchronise(); }
|
|
|
|
|
|
|
|
|
|
#define HAND_RESULT(ss) \
|
|
|
|
|
{ \
|
|
|
|
|
SiteSpinor & ref (out[ss]); \
|
|
|
|
|
#define HAND_RESULT(ss) \
|
|
|
|
|
{ \
|
|
|
|
|
SiteSpinor & ref (out[ss]); \
|
|
|
|
|
coalescedWrite(ref()(0)(0),result_00,lane); \
|
|
|
|
|
coalescedWrite(ref()(0)(1),result_01,lane); \
|
|
|
|
|
coalescedWrite(ref()(0)(2),result_02,lane); \
|
|
|
|
@ -563,7 +566,6 @@ WilsonKernels<Impl>::HandDhopSiteSycl(StencilVector st_perm,StencilEntry *st_p,
|
|
|
|
|
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
int offset,local,perm, ptype;
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
|
|
|
|
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
|
|
|
@ -593,9 +595,7 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
|
|
|
|
|
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
int offset,local,perm, ptype;
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
|
|
|
|
|
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
|
|
|
|
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
|
|
|
|
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
|
|
|
@ -623,8 +623,6 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
int offset,local,perm, ptype;
|
|
|
|
|
|
|
|
|
|
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
|
|
|
|
|
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
|
|
|
|
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
|
|
|
@ -640,8 +638,8 @@ template<class Impl> accelerator_inline void
|
|
|
|
|
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
|
|
|
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
|
|
|
|
{
|
|
|
|
|
auto st_p = st._entries_p;
|
|
|
|
|
auto st_perm = st._permute_type;
|
|
|
|
|
// auto st_p = st._entries_p;
|
|
|
|
|
// auto st_perm = st._permute_type;
|
|
|
|
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
typedef typename Simd::scalar_type S;
|
|
|
|
|
typedef typename Simd::vector_type V;
|
|
|
|
@ -652,7 +650,6 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
|
|
|
|
|
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
int offset,local,perm, ptype;
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
ZERO_RESULT;
|
|
|
|
|
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
|
|
|
@ -670,8 +667,8 @@ template<class Impl> accelerator_inline
|
|
|
|
|
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
|
|
|
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
|
|
|
|
{
|
|
|
|
|
auto st_p = st._entries_p;
|
|
|
|
|
auto st_perm = st._permute_type;
|
|
|
|
|
// auto st_p = st._entries_p;
|
|
|
|
|
// auto st_perm = st._permute_type;
|
|
|
|
|
typedef typename Simd::scalar_type S;
|
|
|
|
|
typedef typename Simd::vector_type V;
|
|
|
|
|
typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt;
|
|
|
|
@ -682,7 +679,6 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
int offset,local,perm, ptype;
|
|
|
|
|
ZERO_RESULT;
|
|
|
|
|
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
|
|
|
|
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
|
|
|
@ -699,8 +695,8 @@ template<class Impl> accelerator_inline void
|
|
|
|
|
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
|
|
|
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
|
|
|
|
{
|
|
|
|
|
auto st_p = st._entries_p;
|
|
|
|
|
auto st_perm = st._permute_type;
|
|
|
|
|
// auto st_p = st._entries_p;
|
|
|
|
|
// auto st_perm = st._permute_type;
|
|
|
|
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
typedef typename Simd::scalar_type S;
|
|
|
|
|
typedef typename Simd::vector_type V;
|
|
|
|
@ -711,7 +707,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
|
|
|
|
|
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
int offset, ptype;
|
|
|
|
|
// int offset, ptype;
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
int nmu=0;
|
|
|
|
|
ZERO_RESULT;
|
|
|
|
@ -730,8 +726,8 @@ template<class Impl> accelerator_inline
|
|
|
|
|
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
|
|
|
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
|
|
|
|
{
|
|
|
|
|
auto st_p = st._entries_p;
|
|
|
|
|
auto st_perm = st._permute_type;
|
|
|
|
|
// auto st_p = st._entries_p;
|
|
|
|
|
// auto st_perm = st._permute_type;
|
|
|
|
|
typedef typename Simd::scalar_type S;
|
|
|
|
|
typedef typename Simd::vector_type V;
|
|
|
|
|
typedef decltype( coalescedRead( in[0]()(0)(0) )) Simt;
|
|
|
|
@ -742,7 +738,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldVi
|
|
|
|
|
HAND_DECLARATIONS(Simt);
|
|
|
|
|
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
int offset, ptype;
|
|
|
|
|
// int offset, ptype;
|
|
|
|
|
int nmu=0;
|
|
|
|
|
ZERO_RESULT;
|
|
|
|
|
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
|
|
|
|