mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-26 05:35:55 +01:00
Implemented first, unoptimized version of hand-unrolled G-parity kernels
Improved Test_gparity
This commit is contained in:
parent
383ca7d392
commit
ab50145001
@ -425,6 +425,22 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
namespace GparityWilsonImpl_helper{
|
||||||
|
template<typename Get, typename A,typename B>
|
||||||
|
struct getAB;
|
||||||
|
|
||||||
|
template<typename A,typename B>
|
||||||
|
struct getAB<A,A,B>{
|
||||||
|
static inline A & ref(A &a, B &b){ return a; }
|
||||||
|
};
|
||||||
|
template<typename A,typename B>
|
||||||
|
struct getAB<B,A,B>{
|
||||||
|
static inline B & ref(A &a, B &b){ return b; }
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <class S, int Nrepresentation, class Options=CoeffReal>
|
template <class S, int Nrepresentation, class Options=CoeffReal>
|
||||||
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||||
public:
|
public:
|
||||||
@ -462,7 +478,10 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
|||||||
|
|
||||||
ImplParams Params;
|
ImplParams Params;
|
||||||
|
|
||||||
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
std::vector<SiteSpinor, alignedAllocator<SiteSpinor> > tmp_full;
|
||||||
|
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > tmp_half;
|
||||||
|
|
||||||
|
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p), tmp_full(GridThread::GetThreads()), tmp_half(GridThread::GetThreads()){};
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||||
|
|
||||||
@ -538,6 +557,66 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class ref>
|
||||||
|
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||||
|
reg = memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename SiteSpinorType>
|
||||||
|
void GparityTwistPermute(SiteSpinorType &into, const SiteSpinorType &from, const int direction, const int distance, const int perm, GridBase* grid){
|
||||||
|
typedef typename SiteSpinorType::scalar_object sobj;
|
||||||
|
sobj stmp;
|
||||||
|
std::vector<sobj> vals(grid->Nsimd());
|
||||||
|
extract(from,vals);
|
||||||
|
std::vector<int> icoor;
|
||||||
|
for(int s=0;s<grid->Nsimd();s++){
|
||||||
|
grid->iCoorFromIindex(icoor,s);
|
||||||
|
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||||
|
|
||||||
|
int permute_lane;
|
||||||
|
if ( distance == 1) {
|
||||||
|
permute_lane = icoor[direction]?1:0;
|
||||||
|
} else {
|
||||||
|
permute_lane = icoor[direction]?0:1;
|
||||||
|
}
|
||||||
|
if(perm) permute_lane = !permute_lane;
|
||||||
|
|
||||||
|
if ( permute_lane ) {
|
||||||
|
stmp(0) = vals[s](1);
|
||||||
|
stmp(1) = vals[s](0);
|
||||||
|
vals[s] = stmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
merge(into,vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename SiteSpinorType>
|
||||||
|
const SiteSpinorType & GparityGetChi(int &g, SiteSpinorType const* in, const int dir, const int f, StencilEntry *SE, StencilImpl &st){
|
||||||
|
const int mmu = dir % 4;
|
||||||
|
const int direction = st._directions[dir];
|
||||||
|
const int sl = st._grid->_simd_layout[direction];
|
||||||
|
const int perm = SE->_permute;
|
||||||
|
g = f;
|
||||||
|
|
||||||
|
if(SE->_around_the_world && Params.twists[mmu]){
|
||||||
|
if(sl == 1){ //not SIMD vectorized in G-parity direction so just change the flavor index accessed to implement the twist
|
||||||
|
g = (f+1) % 2;
|
||||||
|
return in[SE->_offset];
|
||||||
|
}else{ //SIMD vectorized in Gparity direction
|
||||||
|
const int me = omp_get_thread_num();
|
||||||
|
const int distance = st._distances[dir];
|
||||||
|
assert(distance == -1 || distance == 1);
|
||||||
|
SiteSpinorType &tmp = GparityWilsonImpl_helper::getAB<SiteSpinorType, SiteSpinor, SiteHalfSpinor>::ref(tmp_full[me], tmp_half[me]);
|
||||||
|
GparityTwistPermute<SiteSpinorType>(tmp, in[SE->_offset], direction, distance, perm, st._grid);
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
}else return in[SE->_offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
{
|
{
|
||||||
conformable(Uds._grid,GaugeGrid);
|
conformable(Uds._grid,GaugeGrid);
|
||||||
|
@ -30,33 +30,43 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#define REGISTER
|
#define REGISTER
|
||||||
|
|
||||||
#define LOAD_CHIMU \
|
#define LOAD_CHIMU_BODY(F) \
|
||||||
{const SiteSpinor & ref (in._odata[offset]); \
|
Chimu_00=ref(F)(0)(0); \
|
||||||
Chimu_00=ref()(0)(0);\
|
Chimu_01=ref(F)(0)(1); \
|
||||||
Chimu_01=ref()(0)(1);\
|
Chimu_02=ref(F)(0)(2); \
|
||||||
Chimu_02=ref()(0)(2);\
|
Chimu_10=ref(F)(1)(0); \
|
||||||
Chimu_10=ref()(1)(0);\
|
Chimu_11=ref(F)(1)(1); \
|
||||||
Chimu_11=ref()(1)(1);\
|
Chimu_12=ref(F)(1)(2); \
|
||||||
Chimu_12=ref()(1)(2);\
|
Chimu_20=ref(F)(2)(0); \
|
||||||
Chimu_20=ref()(2)(0);\
|
Chimu_21=ref(F)(2)(1); \
|
||||||
Chimu_21=ref()(2)(1);\
|
Chimu_22=ref(F)(2)(2); \
|
||||||
Chimu_22=ref()(2)(2);\
|
Chimu_30=ref(F)(3)(0); \
|
||||||
Chimu_30=ref()(3)(0);\
|
Chimu_31=ref(F)(3)(1); \
|
||||||
Chimu_31=ref()(3)(1);\
|
Chimu_32=ref(F)(3)(2)
|
||||||
Chimu_32=ref()(3)(2);}
|
|
||||||
|
#define LOAD_CHIMU(DIR,F) \
|
||||||
|
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
||||||
|
|
||||||
|
#define LOAD_CHIMU_GPARITY(DIR,F) \
|
||||||
|
{ int g; const SiteSpinor & ref = GparityGetChi<SiteSpinor>(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); }
|
||||||
|
|
||||||
|
#define LOAD_CHI_BODY(F) \
|
||||||
|
Chi_00 = ref(F)(0)(0);\
|
||||||
|
Chi_01 = ref(F)(0)(1);\
|
||||||
|
Chi_02 = ref(F)(0)(2);\
|
||||||
|
Chi_10 = ref(F)(1)(0);\
|
||||||
|
Chi_11 = ref(F)(1)(1);\
|
||||||
|
Chi_12 = ref(F)(1)(2)
|
||||||
|
|
||||||
|
#define LOAD_CHI(DIR,F) \
|
||||||
|
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
|
||||||
|
|
||||||
|
#define LOAD_CHI_GPARITY(DIR,F) \
|
||||||
|
{ int g; const SiteHalfSpinor &ref = GparityGetChi<SiteHalfSpinor>(g,buf,DIR,F,SE,st); LOAD_CHI_BODY(g); }
|
||||||
|
|
||||||
#define LOAD_CHI\
|
|
||||||
{const SiteHalfSpinor &ref(buf[offset]); \
|
|
||||||
Chi_00 = ref()(0)(0);\
|
|
||||||
Chi_01 = ref()(0)(1);\
|
|
||||||
Chi_02 = ref()(0)(2);\
|
|
||||||
Chi_10 = ref()(1)(0);\
|
|
||||||
Chi_11 = ref()(1)(1);\
|
|
||||||
Chi_12 = ref()(1)(2);}
|
|
||||||
|
|
||||||
// To splat or not to splat depends on the implementation
|
// To splat or not to splat depends on the implementation
|
||||||
#define MULT_2SPIN(A)\
|
#define MULT_2SPIN_BODY \
|
||||||
{auto & ref(U._odata[sU](A)); \
|
|
||||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||||
@ -83,7 +93,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
UChi_01+= U_10*Chi_02; \
|
UChi_01+= U_10*Chi_02; \
|
||||||
UChi_11+= U_10*Chi_12; \
|
UChi_11+= U_10*Chi_12; \
|
||||||
UChi_02+= U_20*Chi_02; \
|
UChi_02+= U_20*Chi_02; \
|
||||||
UChi_12+= U_20*Chi_12;}
|
UChi_12+= U_20*Chi_12
|
||||||
|
|
||||||
|
|
||||||
|
#define MULT_2SPIN(A,F) \
|
||||||
|
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
||||||
|
|
||||||
|
#define MULT_2SPIN_GPARITY(A,F) \
|
||||||
|
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||||
|
|
||||||
|
|
||||||
#define PERMUTE_DIR(dir) \
|
#define PERMUTE_DIR(dir) \
|
||||||
@ -307,84 +324,85 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
result_31-= UChi_11; \
|
result_31-= UChi_11; \
|
||||||
result_32-= UChi_12;
|
result_32-= UChi_12;
|
||||||
|
|
||||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
SE=st.GetEntry(ptype,DIR,ss); \
|
SE=st.GetEntry(ptype,DIR,ss); \
|
||||||
offset = SE->_offset; \
|
offset = SE->_offset; \
|
||||||
local = SE->_is_local; \
|
local = SE->_is_local; \
|
||||||
perm = SE->_permute; \
|
perm = SE->_permute; \
|
||||||
if ( local ) { \
|
if ( local ) { \
|
||||||
LOAD_CHIMU; \
|
LOAD_CHIMU_IMPL(DIR,F); \
|
||||||
PROJ; \
|
PROJ; \
|
||||||
if ( perm) { \
|
if ( perm) { \
|
||||||
PERMUTE_DIR(PERM); \
|
PERMUTE_DIR(PERM); \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
LOAD_CHI; \
|
LOAD_CHI_IMPL(DIR,F); \
|
||||||
} \
|
} \
|
||||||
MULT_2SPIN(DIR); \
|
MULT_2SPIN_IMPL(DIR,F); \
|
||||||
RECON;
|
RECON;
|
||||||
|
|
||||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
|
|
||||||
|
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
SE=st.GetEntry(ptype,DIR,ss); \
|
SE=st.GetEntry(ptype,DIR,ss); \
|
||||||
offset = SE->_offset; \
|
offset = SE->_offset; \
|
||||||
local = SE->_is_local; \
|
local = SE->_is_local; \
|
||||||
perm = SE->_permute; \
|
perm = SE->_permute; \
|
||||||
if ( local ) { \
|
if ( local ) { \
|
||||||
LOAD_CHIMU; \
|
LOAD_CHIMU_IMPL(DIR,F); \
|
||||||
PROJ; \
|
PROJ; \
|
||||||
if ( perm) { \
|
if ( perm) { \
|
||||||
PERMUTE_DIR(PERM); \
|
PERMUTE_DIR(PERM); \
|
||||||
} \
|
} \
|
||||||
} else if ( st.same_node[DIR] ) { \
|
} else if ( st.same_node[DIR] ) { \
|
||||||
LOAD_CHI; \
|
LOAD_CHI_IMPL(DIR,F); \
|
||||||
} \
|
} \
|
||||||
if (local || st.same_node[DIR] ) { \
|
if (local || st.same_node[DIR] ) { \
|
||||||
MULT_2SPIN(DIR); \
|
MULT_2SPIN_IMPL(DIR,F); \
|
||||||
RECON; \
|
RECON; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
|
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
SE=st.GetEntry(ptype,DIR,ss); \
|
SE=st.GetEntry(ptype,DIR,ss); \
|
||||||
offset = SE->_offset; \
|
offset = SE->_offset; \
|
||||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||||
LOAD_CHI; \
|
LOAD_CHI_IMPL(DIR,F); \
|
||||||
MULT_2SPIN(DIR); \
|
MULT_2SPIN_IMPL(DIR,F); \
|
||||||
RECON; \
|
RECON; \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAND_RESULT(ss) \
|
#define HAND_RESULT(ss,F) \
|
||||||
{ \
|
{ \
|
||||||
SiteSpinor & ref (out._odata[ss]); \
|
SiteSpinor & ref (out._odata[ss]); \
|
||||||
vstream(ref()(0)(0),result_00); \
|
vstream(ref(F)(0)(0),result_00); \
|
||||||
vstream(ref()(0)(1),result_01); \
|
vstream(ref(F)(0)(1),result_01); \
|
||||||
vstream(ref()(0)(2),result_02); \
|
vstream(ref(F)(0)(2),result_02); \
|
||||||
vstream(ref()(1)(0),result_10); \
|
vstream(ref(F)(1)(0),result_10); \
|
||||||
vstream(ref()(1)(1),result_11); \
|
vstream(ref(F)(1)(1),result_11); \
|
||||||
vstream(ref()(1)(2),result_12); \
|
vstream(ref(F)(1)(2),result_12); \
|
||||||
vstream(ref()(2)(0),result_20); \
|
vstream(ref(F)(2)(0),result_20); \
|
||||||
vstream(ref()(2)(1),result_21); \
|
vstream(ref(F)(2)(1),result_21); \
|
||||||
vstream(ref()(2)(2),result_22); \
|
vstream(ref(F)(2)(2),result_22); \
|
||||||
vstream(ref()(3)(0),result_30); \
|
vstream(ref(F)(3)(0),result_30); \
|
||||||
vstream(ref()(3)(1),result_31); \
|
vstream(ref(F)(3)(1),result_31); \
|
||||||
vstream(ref()(3)(2),result_32); \
|
vstream(ref(F)(3)(2),result_32); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HAND_RESULT_EXT(ss) \
|
#define HAND_RESULT_EXT(ss,F) \
|
||||||
if (nmu){ \
|
if (nmu){ \
|
||||||
SiteSpinor & ref (out._odata[ss]); \
|
SiteSpinor & ref (out._odata[ss]); \
|
||||||
ref()(0)(0)+=result_00; \
|
ref(F)(0)(0)+=result_00; \
|
||||||
ref()(0)(1)+=result_01; \
|
ref(F)(0)(1)+=result_01; \
|
||||||
ref()(0)(2)+=result_02; \
|
ref(F)(0)(2)+=result_02; \
|
||||||
ref()(1)(0)+=result_10; \
|
ref(F)(1)(0)+=result_10; \
|
||||||
ref()(1)(1)+=result_11; \
|
ref(F)(1)(1)+=result_11; \
|
||||||
ref()(1)(2)+=result_12; \
|
ref(F)(1)(2)+=result_12; \
|
||||||
ref()(2)(0)+=result_20; \
|
ref(F)(2)(0)+=result_20; \
|
||||||
ref()(2)(1)+=result_21; \
|
ref(F)(2)(1)+=result_21; \
|
||||||
ref()(2)(2)+=result_22; \
|
ref(F)(2)(2)+=result_22; \
|
||||||
ref()(3)(0)+=result_30; \
|
ref(F)(3)(0)+=result_30; \
|
||||||
ref()(3)(1)+=result_31; \
|
ref(F)(3)(1)+=result_31; \
|
||||||
ref()(3)(2)+=result_32; \
|
ref(F)(3)(2)+=result_32; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -463,15 +481,18 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
|
|||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
|
|
||||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_RESULT(ss);
|
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_RESULT(ss,F)
|
||||||
|
|
||||||
|
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -486,15 +507,18 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
|
|
||||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
|
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_RESULT(ss);
|
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_RESULT(ss,F)
|
||||||
|
|
||||||
|
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
@ -509,16 +533,20 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
|||||||
|
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
ZERO_RESULT;
|
|
||||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
ZERO_RESULT; \
|
||||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_RESULT(ss);
|
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_RESULT(ss,F)
|
||||||
|
|
||||||
|
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -532,16 +560,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
|
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
ZERO_RESULT;
|
|
||||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
ZERO_RESULT; \
|
||||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_RESULT(ss);
|
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_RESULT(ss,F)
|
||||||
|
|
||||||
|
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
@ -557,16 +589,20 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
|||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
ZERO_RESULT;
|
|
||||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
ZERO_RESULT; \
|
||||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_RESULT_EXT(ss);
|
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_RESULT_EXT(ss,F)
|
||||||
|
|
||||||
|
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -581,16 +617,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
ZERO_RESULT;
|
|
||||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
ZERO_RESULT; \
|
||||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
HAND_RESULT_EXT(ss);
|
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||||
|
HAND_RESULT_EXT(ss,F)
|
||||||
|
|
||||||
|
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
@ -647,10 +687,130 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
FermionField &out){ assert(0); } \
|
FermionField &out){ assert(0); } \
|
||||||
|
|
||||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
|
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
|
||||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
|
//HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
|
||||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
|
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
|
||||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
|
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplD>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
|
typedef GparityWilsonImplD Impl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
|
HAND_DECLARATIONS(ignore);
|
||||||
|
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
|
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
typedef GparityWilsonImplD Impl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
|
HAND_DECLARATIONS(ignore);
|
||||||
|
|
||||||
|
StencilEntry *SE;
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplD>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
|
typedef GparityWilsonImplD Impl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
|
HAND_DECLARATIONS(ignore);
|
||||||
|
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
|
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
typedef GparityWilsonImplD Impl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
|
HAND_DECLARATIONS(ignore);
|
||||||
|
|
||||||
|
StencilEntry *SE;
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> void
|
||||||
|
WilsonKernels<GparityWilsonImplD>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
|
typedef GparityWilsonImplD Impl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
|
HAND_DECLARATIONS(ignore);
|
||||||
|
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
|
int nmu=0;
|
||||||
|
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
nmu = 0;
|
||||||
|
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||||
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
typedef GparityWilsonImplD Impl;
|
||||||
|
typedef typename Simd::scalar_type S;
|
||||||
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
|
HAND_DECLARATIONS(ignore);
|
||||||
|
|
||||||
|
StencilEntry *SE;
|
||||||
|
int offset,local,perm, ptype;
|
||||||
|
int nmu=0;
|
||||||
|
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
nmu = 0;
|
||||||
|
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////// Wilson ; uses this implementation /////////////////////
|
////////////// Wilson ; uses this implementation /////////////////////
|
||||||
|
|
||||||
#define INSTANTIATE_THEM(A) \
|
#define INSTANTIATE_THEM(A) \
|
||||||
|
@ -33,22 +33,68 @@ using namespace std;
|
|||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
using namespace Grid::QCD;
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
//typedef GparityDomainWallFermionD GparityDiracOp;
|
||||||
|
//typedef DomainWallFermionD StandardDiracOp;
|
||||||
|
//#define DOP_PARAMS
|
||||||
|
|
||||||
|
typedef GparityMobiusFermionD GparityDiracOp;
|
||||||
|
typedef MobiusFermionD StandardDiracOp;
|
||||||
|
#define DOP_PARAMS ,1.5, 0.5
|
||||||
|
|
||||||
|
|
||||||
|
typedef typename GparityDiracOp::FermionField GparityFermionField;
|
||||||
|
typedef typename GparityDiracOp::GaugeField GparityGaugeField;
|
||||||
|
typedef typename GparityFermionField::vector_type vComplexType;
|
||||||
|
|
||||||
|
typedef typename StandardDiracOp::FermionField StandardFermionField;
|
||||||
|
typedef typename StandardDiracOp::GaugeField StandardGaugeField;
|
||||||
|
|
||||||
|
enum{ same_vComplex = std::is_same<vComplexType, typename StandardFermionField::vector_type>::value };
|
||||||
|
static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIMD complex type");
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
const int nu = 3;
|
int nu = 0;
|
||||||
|
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
for(int i=1;i<argc;i++){
|
||||||
|
if(std::string(argv[i]) == "--Gparity-dir"){
|
||||||
|
std::stringstream ss; ss << argv[i+1]; ss >> nu;
|
||||||
|
std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Testing Gparity Dirac operator "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexType::Nsimd()<<std::endl;
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||||
|
#endif
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using UNROLLED Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
const int Ls=4;
|
const int Ls=4;
|
||||||
const int L =4;
|
//const int L =4;
|
||||||
std::vector<int> latt_2f(Nd,L);
|
//std::vector<int> latt_2f(Nd,L);
|
||||||
std::vector<int> latt_1f(Nd,L); latt_1f[nu] = 2*L;
|
|
||||||
|
std::vector<int> latt_2f = GridDefaultLatt();
|
||||||
|
std::vector<int> latt_1f(latt_2f); latt_1f[nu] = 2*latt_2f[nu];
|
||||||
|
int L = latt_2f[nu];
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexType::Nsimd());
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "SIMD layout: ";
|
||||||
|
for(int i=0;i<simd_layout.size();i++) std::cout << simd_layout[i] << " ";
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi(); //node layout
|
std::vector<int> mpi_layout = GridDefaultMpi(); //node layout
|
||||||
|
|
||||||
GridCartesian * UGrid_1f = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout);
|
GridCartesian * UGrid_1f = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout);
|
||||||
@ -67,13 +113,13 @@ int main (int argc, char ** argv)
|
|||||||
GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5);
|
GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5);
|
||||||
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
|
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
LatticeGaugeField Umu_2f(UGrid_2f);
|
GparityGaugeField Umu_2f(UGrid_2f);
|
||||||
SU3::HotConfiguration(RNG4_2f,Umu_2f);
|
SU3::HotConfiguration(RNG4_2f,Umu_2f);
|
||||||
|
|
||||||
LatticeFermion src (FGrid_2f);
|
StandardFermionField src (FGrid_2f);
|
||||||
LatticeFermion tmpsrc(FGrid_2f);
|
StandardFermionField tmpsrc(FGrid_2f);
|
||||||
FermionField src_2f(FGrid_2f);
|
GparityFermionField src_2f(FGrid_2f);
|
||||||
LatticeFermion src_1f(FGrid_1f);
|
StandardFermionField src_1f(FGrid_1f);
|
||||||
|
|
||||||
// Replicate fermion source
|
// Replicate fermion source
|
||||||
random(RNG5_2f,src);
|
random(RNG5_2f,src);
|
||||||
@ -81,8 +127,8 @@ int main (int argc, char ** argv)
|
|||||||
tmpsrc=src*2.0;
|
tmpsrc=src*2.0;
|
||||||
PokeIndex<0>(src_2f,tmpsrc,1);
|
PokeIndex<0>(src_2f,tmpsrc,1);
|
||||||
|
|
||||||
LatticeFermion result_1f(FGrid_1f); result_1f=zero;
|
StandardFermionField result_1f(FGrid_1f); result_1f=zero;
|
||||||
LatticeGaugeField Umu_1f(UGrid_1f);
|
StandardGaugeField Umu_1f(UGrid_1f);
|
||||||
Replicate(Umu_2f,Umu_1f);
|
Replicate(Umu_2f,Umu_1f);
|
||||||
|
|
||||||
//Coordinate grid for reference
|
//Coordinate grid for reference
|
||||||
@ -92,7 +138,7 @@ int main (int argc, char ** argv)
|
|||||||
//Copy-conjugate the gauge field
|
//Copy-conjugate the gauge field
|
||||||
//First C-shift the lattice by Lx/2
|
//First C-shift the lattice by Lx/2
|
||||||
{
|
{
|
||||||
LatticeGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) );
|
StandardGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) );
|
||||||
Umu_1f = where( xcoor_1f >= Integer(L), Umu_shift, Umu_1f );
|
Umu_1f = where( xcoor_1f >= Integer(L), Umu_shift, Umu_1f );
|
||||||
|
|
||||||
// hack test to check the same
|
// hack test to check the same
|
||||||
@ -101,7 +147,7 @@ int main (int argc, char ** argv)
|
|||||||
cout << GridLogMessage << "Umu diff " << norm2(Umu_shift)<<std::endl;
|
cout << GridLogMessage << "Umu diff " << norm2(Umu_shift)<<std::endl;
|
||||||
|
|
||||||
//Make the gauge field antiperiodic in nu-direction
|
//Make the gauge field antiperiodic in nu-direction
|
||||||
LatticeColourMatrix Unu(UGrid_1f);
|
decltype(PeekIndex<LorentzIndex>(Umu_1f,nu)) Unu(UGrid_1f);
|
||||||
Unu = PeekIndex<LorentzIndex>(Umu_1f,nu);
|
Unu = PeekIndex<LorentzIndex>(Umu_1f,nu);
|
||||||
Unu = where(xcoor_1f == Integer(2*L-1), -Unu, Unu);
|
Unu = where(xcoor_1f == Integer(2*L-1), -Unu, Unu);
|
||||||
PokeIndex<LorentzIndex>(Umu_1f,Unu,nu);
|
PokeIndex<LorentzIndex>(Umu_1f,Unu,nu);
|
||||||
@ -115,33 +161,33 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
RealD mass=0.0;
|
RealD mass=0.0;
|
||||||
RealD M5=1.8;
|
RealD M5=1.8;
|
||||||
DomainWallFermionR Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5);
|
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS);
|
||||||
|
|
||||||
LatticeFermion src_o_1f(FrbGrid_1f);
|
StandardFermionField src_o_1f(FrbGrid_1f);
|
||||||
LatticeFermion result_o_1f(FrbGrid_1f);
|
StandardFermionField result_o_1f(FrbGrid_1f);
|
||||||
pickCheckerboard(Odd,src_o_1f,src_1f);
|
pickCheckerboard(Odd,src_o_1f,src_1f);
|
||||||
result_o_1f=zero;
|
result_o_1f=zero;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<StandardDiracOp,StandardFermionField> HermOpEO(Ddwf);
|
||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
ConjugateGradient<StandardFermionField> CG(1.0e-8,10000);
|
||||||
CG(HermOpEO,src_o_1f,result_o_1f);
|
CG(HermOpEO,src_o_1f,result_o_1f);
|
||||||
|
|
||||||
// const int nu = 3;
|
// const int nu = 3;
|
||||||
std::vector<int> twists(Nd,0);
|
std::vector<int> twists(Nd,0);
|
||||||
twists[nu] = 1;
|
twists[nu] = 1;
|
||||||
GparityDomainWallFermionR::ImplParams params;
|
GparityDiracOp::ImplParams params;
|
||||||
params.twists = twists;
|
params.twists = twists;
|
||||||
GparityDomainWallFermionR GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5,params);
|
GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params);
|
||||||
|
|
||||||
for(int disp=-1;disp<=1;disp+=2)
|
for(int disp=-1;disp<=1;disp+=2)
|
||||||
for(int mu=0;mu<5;mu++)
|
for(int mu=0;mu<5;mu++)
|
||||||
{
|
{
|
||||||
FermionField Dsrc_2f(FGrid_2f);
|
GparityFermionField Dsrc_2f(FGrid_2f);
|
||||||
|
|
||||||
LatticeFermion Dsrc_1f(FGrid_1f);
|
StandardFermionField Dsrc_1f(FGrid_1f);
|
||||||
LatticeFermion Dsrc_2freplica(FGrid_1f);
|
StandardFermionField Dsrc_2freplica(FGrid_1f);
|
||||||
LatticeFermion Dsrc_2freplica0(FGrid_1f);
|
StandardFermionField Dsrc_2freplica0(FGrid_1f);
|
||||||
LatticeFermion Dsrc_2freplica1(FGrid_1f);
|
StandardFermionField Dsrc_2freplica1(FGrid_1f);
|
||||||
|
|
||||||
if ( mu ==0 ) {
|
if ( mu ==0 ) {
|
||||||
std::cout << GridLogMessage<< " Cross checking entire hopping term"<<std::endl;
|
std::cout << GridLogMessage<< " Cross checking entire hopping term"<<std::endl;
|
||||||
@ -156,8 +202,8 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage << "S norms "<< norm2(src_2f) << " " << norm2(src_1f) <<std::endl;
|
std::cout << GridLogMessage << "S norms "<< norm2(src_2f) << " " << norm2(src_1f) <<std::endl;
|
||||||
std::cout << GridLogMessage << "D norms "<< norm2(Dsrc_2f)<< " " << norm2(Dsrc_1f) <<std::endl;
|
std::cout << GridLogMessage << "D norms "<< norm2(Dsrc_2f)<< " " << norm2(Dsrc_1f) <<std::endl;
|
||||||
|
|
||||||
LatticeFermion Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0);
|
StandardFermionField Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0);
|
||||||
LatticeFermion Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1);
|
StandardFermionField Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1);
|
||||||
|
|
||||||
// Dsrc_2f1 = Dsrc_2f1 - Dsrc_2f0;
|
// Dsrc_2f1 = Dsrc_2f1 - Dsrc_2f0;
|
||||||
// std::cout << GridLogMessage << " Cross check two halves " <<norm2(Dsrc_2f1)<<std::endl;
|
// std::cout << GridLogMessage << " Cross check two halves " <<norm2(Dsrc_2f1)<<std::endl;
|
||||||
@ -174,20 +220,20 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
FermionField chi (FGrid_2f); gaussian(RNG5_2f,chi);
|
GparityFermionField chi (FGrid_2f); gaussian(RNG5_2f,chi);
|
||||||
FermionField phi (FGrid_2f); gaussian(RNG5_2f,phi);
|
GparityFermionField phi (FGrid_2f); gaussian(RNG5_2f,phi);
|
||||||
|
|
||||||
FermionField chi_e (FrbGrid_2f);
|
GparityFermionField chi_e (FrbGrid_2f);
|
||||||
FermionField chi_o (FrbGrid_2f);
|
GparityFermionField chi_o (FrbGrid_2f);
|
||||||
|
|
||||||
FermionField dchi_e (FrbGrid_2f);
|
GparityFermionField dchi_e (FrbGrid_2f);
|
||||||
FermionField dchi_o (FrbGrid_2f);
|
GparityFermionField dchi_o (FrbGrid_2f);
|
||||||
|
|
||||||
FermionField phi_e (FrbGrid_2f);
|
GparityFermionField phi_e (FrbGrid_2f);
|
||||||
FermionField phi_o (FrbGrid_2f);
|
GparityFermionField phi_o (FrbGrid_2f);
|
||||||
|
|
||||||
FermionField dphi_e (FrbGrid_2f);
|
GparityFermionField dphi_e (FrbGrid_2f);
|
||||||
FermionField dphi_o (FrbGrid_2f);
|
GparityFermionField dphi_o (FrbGrid_2f);
|
||||||
|
|
||||||
pickCheckerboard(Even,chi_e,chi);
|
pickCheckerboard(Even,chi_e,chi);
|
||||||
pickCheckerboard(Odd ,chi_o,chi);
|
pickCheckerboard(Odd ,chi_o,chi);
|
||||||
@ -212,14 +258,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FermionField result_2f(FGrid_2f); result_2f=zero;
|
GparityFermionField result_2f(FGrid_2f); result_2f=zero;
|
||||||
FermionField src_o_2f(FrbGrid_2f);
|
GparityFermionField src_o_2f(FrbGrid_2f);
|
||||||
FermionField result_o_2f(FrbGrid_2f);
|
GparityFermionField result_o_2f(FrbGrid_2f);
|
||||||
pickCheckerboard(Odd,src_o_2f,src_2f);
|
pickCheckerboard(Odd,src_o_2f,src_2f);
|
||||||
result_o_2f=zero;
|
result_o_2f=zero;
|
||||||
|
|
||||||
ConjugateGradient<FermionField> CG2f(1.0e-8,10000);
|
ConjugateGradient<GparityFermionField> CG2f(1.0e-8,10000);
|
||||||
SchurDiagMooeeOperator<GparityDomainWallFermionR,FermionField> HermOpEO2f(GPDdwf);
|
SchurDiagMooeeOperator<GparityDiracOp,GparityFermionField> HermOpEO2f(GPDdwf);
|
||||||
CG2f(HermOpEO2f,src_o_2f,result_o_2f);
|
CG2f(HermOpEO2f,src_o_2f,result_o_2f);
|
||||||
|
|
||||||
std::cout << "2f cb "<<result_o_2f.checkerboard<<std::endl;
|
std::cout << "2f cb "<<result_o_2f.checkerboard<<std::endl;
|
||||||
@ -227,10 +273,10 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
|
std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
|
||||||
|
|
||||||
LatticeFermion res0o (FrbGrid_2f);
|
StandardFermionField res0o (FrbGrid_2f);
|
||||||
LatticeFermion res1o (FrbGrid_2f);
|
StandardFermionField res1o (FrbGrid_2f);
|
||||||
LatticeFermion res0 (FGrid_2f);
|
StandardFermionField res0 (FGrid_2f);
|
||||||
LatticeFermion res1 (FGrid_2f);
|
StandardFermionField res1 (FGrid_2f);
|
||||||
|
|
||||||
res0=zero;
|
res0=zero;
|
||||||
res1=zero;
|
res1=zero;
|
||||||
@ -244,9 +290,9 @@ int main (int argc, char ** argv)
|
|||||||
setCheckerboard(res0,res0o);
|
setCheckerboard(res0,res0o);
|
||||||
setCheckerboard(res1,res1o);
|
setCheckerboard(res1,res1o);
|
||||||
|
|
||||||
LatticeFermion replica (FGrid_1f);
|
StandardFermionField replica (FGrid_1f);
|
||||||
LatticeFermion replica0(FGrid_1f);
|
StandardFermionField replica0(FGrid_1f);
|
||||||
LatticeFermion replica1(FGrid_1f);
|
StandardFermionField replica1(FGrid_1f);
|
||||||
Replicate(res0,replica0);
|
Replicate(res0,replica0);
|
||||||
Replicate(res1,replica1);
|
Replicate(res1,replica1);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user