mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Implemented first, unoptimized version of hand-unrolled G-parity kernels
Improved Test_gparity
This commit is contained in:
parent
383ca7d392
commit
ab50145001
@ -425,6 +425,22 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
namespace GparityWilsonImpl_helper{
|
||||
template<typename Get, typename A,typename B>
|
||||
struct getAB;
|
||||
|
||||
template<typename A,typename B>
|
||||
struct getAB<A,A,B>{
|
||||
static inline A & ref(A &a, B &b){ return a; }
|
||||
};
|
||||
template<typename A,typename B>
|
||||
struct getAB<B,A,B>{
|
||||
static inline B & ref(A &a, B &b){ return b; }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
template <class S, int Nrepresentation, class Options=CoeffReal>
|
||||
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||
public:
|
||||
@ -462,7 +478,10 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
std::vector<SiteSpinor, alignedAllocator<SiteSpinor> > tmp_full;
|
||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > tmp_half;
|
||||
|
||||
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p), tmp_full(GridThread::GetThreads()), tmp_half(GridThread::GetThreads()){};
|
||||
|
||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||
|
||||
@ -538,6 +557,66 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
template<typename SiteSpinorType>
|
||||
void GparityTwistPermute(SiteSpinorType &into, const SiteSpinorType &from, const int direction, const int distance, const int perm, GridBase* grid){
|
||||
typedef typename SiteSpinorType::scalar_object sobj;
|
||||
sobj stmp;
|
||||
std::vector<sobj> vals(grid->Nsimd());
|
||||
extract(from,vals);
|
||||
std::vector<int> icoor;
|
||||
for(int s=0;s<grid->Nsimd();s++){
|
||||
grid->iCoorFromIindex(icoor,s);
|
||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||
|
||||
int permute_lane;
|
||||
if ( distance == 1) {
|
||||
permute_lane = icoor[direction]?1:0;
|
||||
} else {
|
||||
permute_lane = icoor[direction]?0:1;
|
||||
}
|
||||
if(perm) permute_lane = !permute_lane;
|
||||
|
||||
if ( permute_lane ) {
|
||||
stmp(0) = vals[s](1);
|
||||
stmp(1) = vals[s](0);
|
||||
vals[s] = stmp;
|
||||
}
|
||||
}
|
||||
merge(into,vals);
|
||||
}
|
||||
|
||||
|
||||
template<typename SiteSpinorType>
|
||||
const SiteSpinorType & GparityGetChi(int &g, SiteSpinorType const* in, const int dir, const int f, StencilEntry *SE, StencilImpl &st){
|
||||
const int mmu = dir % 4;
|
||||
const int direction = st._directions[dir];
|
||||
const int sl = st._grid->_simd_layout[direction];
|
||||
const int perm = SE->_permute;
|
||||
g = f;
|
||||
|
||||
if(SE->_around_the_world && Params.twists[mmu]){
|
||||
if(sl == 1){ //not SIMD vectorized in G-parity direction so just change the flavor index accessed to implement the twist
|
||||
g = (f+1) % 2;
|
||||
return in[SE->_offset];
|
||||
}else{ //SIMD vectorized in Gparity direction
|
||||
const int me = omp_get_thread_num();
|
||||
const int distance = st._distances[dir];
|
||||
assert(distance == -1 || distance == 1);
|
||||
SiteSpinorType &tmp = GparityWilsonImpl_helper::getAB<SiteSpinorType, SiteSpinor, SiteHalfSpinor>::ref(tmp_full[me], tmp_half[me]);
|
||||
GparityTwistPermute<SiteSpinorType>(tmp, in[SE->_offset], direction, distance, perm, st._grid);
|
||||
return tmp;
|
||||
}
|
||||
}else return in[SE->_offset];
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
conformable(Uds._grid,GaugeGrid);
|
||||
|
@ -30,60 +30,77 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHIMU \
|
||||
{const SiteSpinor & ref (in._odata[offset]); \
|
||||
Chimu_00=ref()(0)(0);\
|
||||
Chimu_01=ref()(0)(1);\
|
||||
Chimu_02=ref()(0)(2);\
|
||||
Chimu_10=ref()(1)(0);\
|
||||
Chimu_11=ref()(1)(1);\
|
||||
Chimu_12=ref()(1)(2);\
|
||||
Chimu_20=ref()(2)(0);\
|
||||
Chimu_21=ref()(2)(1);\
|
||||
Chimu_22=ref()(2)(2);\
|
||||
Chimu_30=ref()(3)(0);\
|
||||
Chimu_31=ref()(3)(1);\
|
||||
Chimu_32=ref()(3)(2);}
|
||||
#define LOAD_CHIMU_BODY(F) \
|
||||
Chimu_00=ref(F)(0)(0); \
|
||||
Chimu_01=ref(F)(0)(1); \
|
||||
Chimu_02=ref(F)(0)(2); \
|
||||
Chimu_10=ref(F)(1)(0); \
|
||||
Chimu_11=ref(F)(1)(1); \
|
||||
Chimu_12=ref(F)(1)(2); \
|
||||
Chimu_20=ref(F)(2)(0); \
|
||||
Chimu_21=ref(F)(2)(1); \
|
||||
Chimu_22=ref(F)(2)(2); \
|
||||
Chimu_30=ref(F)(3)(0); \
|
||||
Chimu_31=ref(F)(3)(1); \
|
||||
Chimu_32=ref(F)(3)(2)
|
||||
|
||||
#define LOAD_CHIMU(DIR,F) \
|
||||
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
|
||||
|
||||
#define LOAD_CHIMU_GPARITY(DIR,F) \
|
||||
{ int g; const SiteSpinor & ref = GparityGetChi<SiteSpinor>(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); }
|
||||
|
||||
#define LOAD_CHI_BODY(F) \
|
||||
Chi_00 = ref(F)(0)(0);\
|
||||
Chi_01 = ref(F)(0)(1);\
|
||||
Chi_02 = ref(F)(0)(2);\
|
||||
Chi_10 = ref(F)(1)(0);\
|
||||
Chi_11 = ref(F)(1)(1);\
|
||||
Chi_12 = ref(F)(1)(2)
|
||||
|
||||
#define LOAD_CHI(DIR,F) \
|
||||
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
|
||||
|
||||
#define LOAD_CHI_GPARITY(DIR,F) \
|
||||
{ int g; const SiteHalfSpinor &ref = GparityGetChi<SiteHalfSpinor>(g,buf,DIR,F,SE,st); LOAD_CHI_BODY(g); }
|
||||
|
||||
#define LOAD_CHI\
|
||||
{const SiteHalfSpinor &ref(buf[offset]); \
|
||||
Chi_00 = ref()(0)(0);\
|
||||
Chi_01 = ref()(0)(1);\
|
||||
Chi_02 = ref()(0)(2);\
|
||||
Chi_10 = ref()(1)(0);\
|
||||
Chi_11 = ref()(1)(1);\
|
||||
Chi_12 = ref()(1)(2);}
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN(A)\
|
||||
{auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00;\
|
||||
UChi_10 = U_00*Chi_10;\
|
||||
UChi_01 = U_10*Chi_00;\
|
||||
UChi_11 = U_10*Chi_10;\
|
||||
UChi_02 = U_20*Chi_00;\
|
||||
UChi_12 = U_20*Chi_10;\
|
||||
UChi_00+= U_01*Chi_01;\
|
||||
UChi_10+= U_01*Chi_11;\
|
||||
UChi_01+= U_11*Chi_01;\
|
||||
UChi_11+= U_11*Chi_11;\
|
||||
UChi_02+= U_21*Chi_01;\
|
||||
UChi_12+= U_21*Chi_11;\
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02;\
|
||||
UChi_10+= U_00*Chi_12;\
|
||||
UChi_01+= U_10*Chi_02;\
|
||||
UChi_11+= U_10*Chi_12;\
|
||||
UChi_02+= U_20*Chi_02;\
|
||||
UChi_12+= U_20*Chi_12;}
|
||||
#define MULT_2SPIN_BODY \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00; \
|
||||
UChi_10 = U_00*Chi_10; \
|
||||
UChi_01 = U_10*Chi_00; \
|
||||
UChi_11 = U_10*Chi_10; \
|
||||
UChi_02 = U_20*Chi_00; \
|
||||
UChi_12 = U_20*Chi_10; \
|
||||
UChi_00+= U_01*Chi_01; \
|
||||
UChi_10+= U_01*Chi_11; \
|
||||
UChi_01+= U_11*Chi_01; \
|
||||
UChi_11+= U_11*Chi_11; \
|
||||
UChi_02+= U_21*Chi_01; \
|
||||
UChi_12+= U_21*Chi_11; \
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02; \
|
||||
UChi_10+= U_00*Chi_12; \
|
||||
UChi_01+= U_10*Chi_02; \
|
||||
UChi_11+= U_10*Chi_12; \
|
||||
UChi_02+= U_20*Chi_02; \
|
||||
UChi_12+= U_20*Chi_12
|
||||
|
||||
|
||||
#define MULT_2SPIN(A,F) \
|
||||
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
|
||||
|
||||
#define MULT_2SPIN_GPARITY(A,F) \
|
||||
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
@ -307,84 +324,85 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
result_31-= UChi_11; \
|
||||
result_32-= UChi_12;
|
||||
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
|
||||
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU; \
|
||||
LOAD_CHIMU_IMPL(DIR,F); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI; \
|
||||
LOAD_CHI_IMPL(DIR,F); \
|
||||
} \
|
||||
MULT_2SPIN(DIR); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON;
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHIMU; \
|
||||
LOAD_CHIMU_IMPL(DIR,F); \
|
||||
PROJ; \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(PERM); \
|
||||
} \
|
||||
} else if ( st.same_node[DIR] ) { \
|
||||
LOAD_CHI; \
|
||||
LOAD_CHI_IMPL(DIR,F); \
|
||||
} \
|
||||
if (local || st.same_node[DIR] ) { \
|
||||
MULT_2SPIN(DIR); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
|
||||
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
SE=st.GetEntry(ptype,DIR,ss); \
|
||||
offset = SE->_offset; \
|
||||
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
|
||||
LOAD_CHI; \
|
||||
MULT_2SPIN(DIR); \
|
||||
LOAD_CHI_IMPL(DIR,F); \
|
||||
MULT_2SPIN_IMPL(DIR,F); \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
||||
#define HAND_RESULT(ss) \
|
||||
#define HAND_RESULT(ss,F) \
|
||||
{ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
vstream(ref()(0)(0),result_00); \
|
||||
vstream(ref()(0)(1),result_01); \
|
||||
vstream(ref()(0)(2),result_02); \
|
||||
vstream(ref()(1)(0),result_10); \
|
||||
vstream(ref()(1)(1),result_11); \
|
||||
vstream(ref()(1)(2),result_12); \
|
||||
vstream(ref()(2)(0),result_20); \
|
||||
vstream(ref()(2)(1),result_21); \
|
||||
vstream(ref()(2)(2),result_22); \
|
||||
vstream(ref()(3)(0),result_30); \
|
||||
vstream(ref()(3)(1),result_31); \
|
||||
vstream(ref()(3)(2),result_32); \
|
||||
vstream(ref(F)(0)(0),result_00); \
|
||||
vstream(ref(F)(0)(1),result_01); \
|
||||
vstream(ref(F)(0)(2),result_02); \
|
||||
vstream(ref(F)(1)(0),result_10); \
|
||||
vstream(ref(F)(1)(1),result_11); \
|
||||
vstream(ref(F)(1)(2),result_12); \
|
||||
vstream(ref(F)(2)(0),result_20); \
|
||||
vstream(ref(F)(2)(1),result_21); \
|
||||
vstream(ref(F)(2)(2),result_22); \
|
||||
vstream(ref(F)(3)(0),result_30); \
|
||||
vstream(ref(F)(3)(1),result_31); \
|
||||
vstream(ref(F)(3)(2),result_32); \
|
||||
}
|
||||
|
||||
#define HAND_RESULT_EXT(ss) \
|
||||
#define HAND_RESULT_EXT(ss,F) \
|
||||
if (nmu){ \
|
||||
SiteSpinor & ref (out._odata[ss]); \
|
||||
ref()(0)(0)+=result_00; \
|
||||
ref()(0)(1)+=result_01; \
|
||||
ref()(0)(2)+=result_02; \
|
||||
ref()(1)(0)+=result_10; \
|
||||
ref()(1)(1)+=result_11; \
|
||||
ref()(1)(2)+=result_12; \
|
||||
ref()(2)(0)+=result_20; \
|
||||
ref()(2)(1)+=result_21; \
|
||||
ref()(2)(2)+=result_22; \
|
||||
ref()(3)(0)+=result_30; \
|
||||
ref()(3)(1)+=result_31; \
|
||||
ref()(3)(2)+=result_32; \
|
||||
ref(F)(0)(0)+=result_00; \
|
||||
ref(F)(0)(1)+=result_01; \
|
||||
ref(F)(0)(2)+=result_02; \
|
||||
ref(F)(1)(0)+=result_10; \
|
||||
ref(F)(1)(1)+=result_11; \
|
||||
ref(F)(1)(2)+=result_12; \
|
||||
ref(F)(2)(0)+=result_20; \
|
||||
ref(F)(2)(1)+=result_21; \
|
||||
ref(F)(2)(2)+=result_22; \
|
||||
ref(F)(3)(0)+=result_30; \
|
||||
ref(F)(3)(1)+=result_31; \
|
||||
ref(F)(3)(2)+=result_32; \
|
||||
}
|
||||
|
||||
|
||||
@ -463,15 +481,18 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -485,16 +506,19 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
@ -509,16 +533,20 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -532,16 +560,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
@ -557,16 +589,20 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -581,16 +617,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
ZERO_RESULT;
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||
HAND_RESULT_EXT(ss);
|
||||
|
||||
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
|
||||
ZERO_RESULT; \
|
||||
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
|
||||
HAND_RESULT_EXT(ss,F)
|
||||
|
||||
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
@ -647,10 +687,130 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
FermionField &out){ assert(0); } \
|
||||
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
|
||||
//HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
|
||||
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplD>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef GparityWilsonImplD Impl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef GparityWilsonImplD Impl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
}
|
||||
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplD>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef GparityWilsonImplD Impl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef GparityWilsonImplD Impl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
}
|
||||
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplD>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef GparityWilsonImplD Impl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
int offset,local,perm, ptype;
|
||||
StencilEntry *SE;
|
||||
int nmu=0;
|
||||
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
nmu = 0;
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
}
|
||||
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef GparityWilsonImplD Impl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
HAND_DECLARATIONS(ignore);
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
int nmu=0;
|
||||
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
nmu = 0;
|
||||
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
|
@ -33,22 +33,68 @@ using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
typedef typename GparityDomainWallFermionR::FermionField FermionField;
|
||||
//typedef GparityDomainWallFermionD GparityDiracOp;
|
||||
//typedef DomainWallFermionD StandardDiracOp;
|
||||
//#define DOP_PARAMS
|
||||
|
||||
typedef GparityMobiusFermionD GparityDiracOp;
|
||||
typedef MobiusFermionD StandardDiracOp;
|
||||
#define DOP_PARAMS ,1.5, 0.5
|
||||
|
||||
|
||||
typedef typename GparityDiracOp::FermionField GparityFermionField;
|
||||
typedef typename GparityDiracOp::GaugeField GparityGaugeField;
|
||||
typedef typename GparityFermionField::vector_type vComplexType;
|
||||
|
||||
typedef typename StandardDiracOp::FermionField StandardFermionField;
|
||||
typedef typename StandardDiracOp::GaugeField StandardGaugeField;
|
||||
|
||||
enum{ same_vComplex = std::is_same<vComplexType, typename StandardFermionField::vector_type>::value };
|
||||
static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIMD complex type");
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
const int nu = 3;
|
||||
int nu = 0;
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
for(int i=1;i<argc;i++){
|
||||
if(std::string(argv[i]) == "--Gparity-dir"){
|
||||
std::stringstream ss; ss << argv[i+1]; ss >> nu;
|
||||
std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
std::cout << GridLogMessage<< "* Testing Gparity Dirac operator "<<std::endl;
|
||||
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexType::Nsimd()<<std::endl;
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
|
||||
#endif
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using UNROLLED Nc=3 WilsonKernels" <<std::endl;
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||
|
||||
const int Ls=4;
|
||||
const int L =4;
|
||||
std::vector<int> latt_2f(Nd,L);
|
||||
std::vector<int> latt_1f(Nd,L); latt_1f[nu] = 2*L;
|
||||
//const int L =4;
|
||||
//std::vector<int> latt_2f(Nd,L);
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||
std::vector<int> latt_2f = GridDefaultLatt();
|
||||
std::vector<int> latt_1f(latt_2f); latt_1f[nu] = 2*latt_2f[nu];
|
||||
int L = latt_2f[nu];
|
||||
|
||||
|
||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexType::Nsimd());
|
||||
|
||||
std::cout << GridLogMessage << "SIMD layout: ";
|
||||
for(int i=0;i<simd_layout.size();i++) std::cout << simd_layout[i] << " ";
|
||||
std::cout << std::endl;
|
||||
|
||||
std::vector<int> mpi_layout = GridDefaultMpi(); //node layout
|
||||
|
||||
GridCartesian * UGrid_1f = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout);
|
||||
@ -67,13 +113,13 @@ int main (int argc, char ** argv)
|
||||
GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu_2f(UGrid_2f);
|
||||
GparityGaugeField Umu_2f(UGrid_2f);
|
||||
SU3::HotConfiguration(RNG4_2f,Umu_2f);
|
||||
|
||||
LatticeFermion src (FGrid_2f);
|
||||
LatticeFermion tmpsrc(FGrid_2f);
|
||||
FermionField src_2f(FGrid_2f);
|
||||
LatticeFermion src_1f(FGrid_1f);
|
||||
StandardFermionField src (FGrid_2f);
|
||||
StandardFermionField tmpsrc(FGrid_2f);
|
||||
GparityFermionField src_2f(FGrid_2f);
|
||||
StandardFermionField src_1f(FGrid_1f);
|
||||
|
||||
// Replicate fermion source
|
||||
random(RNG5_2f,src);
|
||||
@ -81,8 +127,8 @@ int main (int argc, char ** argv)
|
||||
tmpsrc=src*2.0;
|
||||
PokeIndex<0>(src_2f,tmpsrc,1);
|
||||
|
||||
LatticeFermion result_1f(FGrid_1f); result_1f=zero;
|
||||
LatticeGaugeField Umu_1f(UGrid_1f);
|
||||
StandardFermionField result_1f(FGrid_1f); result_1f=zero;
|
||||
StandardGaugeField Umu_1f(UGrid_1f);
|
||||
Replicate(Umu_2f,Umu_1f);
|
||||
|
||||
//Coordinate grid for reference
|
||||
@ -92,7 +138,7 @@ int main (int argc, char ** argv)
|
||||
//Copy-conjugate the gauge field
|
||||
//First C-shift the lattice by Lx/2
|
||||
{
|
||||
LatticeGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) );
|
||||
StandardGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) );
|
||||
Umu_1f = where( xcoor_1f >= Integer(L), Umu_shift, Umu_1f );
|
||||
|
||||
// hack test to check the same
|
||||
@ -101,7 +147,7 @@ int main (int argc, char ** argv)
|
||||
cout << GridLogMessage << "Umu diff " << norm2(Umu_shift)<<std::endl;
|
||||
|
||||
//Make the gauge field antiperiodic in nu-direction
|
||||
LatticeColourMatrix Unu(UGrid_1f);
|
||||
decltype(PeekIndex<LorentzIndex>(Umu_1f,nu)) Unu(UGrid_1f);
|
||||
Unu = PeekIndex<LorentzIndex>(Umu_1f,nu);
|
||||
Unu = where(xcoor_1f == Integer(2*L-1), -Unu, Unu);
|
||||
PokeIndex<LorentzIndex>(Umu_1f,Unu,nu);
|
||||
@ -115,33 +161,33 @@ int main (int argc, char ** argv)
|
||||
|
||||
RealD mass=0.0;
|
||||
RealD M5=1.8;
|
||||
DomainWallFermionR Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5);
|
||||
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS);
|
||||
|
||||
LatticeFermion src_o_1f(FrbGrid_1f);
|
||||
LatticeFermion result_o_1f(FrbGrid_1f);
|
||||
StandardFermionField src_o_1f(FrbGrid_1f);
|
||||
StandardFermionField result_o_1f(FrbGrid_1f);
|
||||
pickCheckerboard(Odd,src_o_1f,src_1f);
|
||||
result_o_1f=zero;
|
||||
|
||||
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
SchurDiagMooeeOperator<StandardDiracOp,StandardFermionField> HermOpEO(Ddwf);
|
||||
ConjugateGradient<StandardFermionField> CG(1.0e-8,10000);
|
||||
CG(HermOpEO,src_o_1f,result_o_1f);
|
||||
|
||||
// const int nu = 3;
|
||||
std::vector<int> twists(Nd,0);
|
||||
twists[nu] = 1;
|
||||
GparityDomainWallFermionR::ImplParams params;
|
||||
GparityDiracOp::ImplParams params;
|
||||
params.twists = twists;
|
||||
GparityDomainWallFermionR GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5,params);
|
||||
GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params);
|
||||
|
||||
for(int disp=-1;disp<=1;disp+=2)
|
||||
for(int mu=0;mu<5;mu++)
|
||||
{
|
||||
FermionField Dsrc_2f(FGrid_2f);
|
||||
GparityFermionField Dsrc_2f(FGrid_2f);
|
||||
|
||||
LatticeFermion Dsrc_1f(FGrid_1f);
|
||||
LatticeFermion Dsrc_2freplica(FGrid_1f);
|
||||
LatticeFermion Dsrc_2freplica0(FGrid_1f);
|
||||
LatticeFermion Dsrc_2freplica1(FGrid_1f);
|
||||
StandardFermionField Dsrc_1f(FGrid_1f);
|
||||
StandardFermionField Dsrc_2freplica(FGrid_1f);
|
||||
StandardFermionField Dsrc_2freplica0(FGrid_1f);
|
||||
StandardFermionField Dsrc_2freplica1(FGrid_1f);
|
||||
|
||||
if ( mu ==0 ) {
|
||||
std::cout << GridLogMessage<< " Cross checking entire hopping term"<<std::endl;
|
||||
@ -156,8 +202,8 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage << "S norms "<< norm2(src_2f) << " " << norm2(src_1f) <<std::endl;
|
||||
std::cout << GridLogMessage << "D norms "<< norm2(Dsrc_2f)<< " " << norm2(Dsrc_1f) <<std::endl;
|
||||
|
||||
LatticeFermion Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0);
|
||||
LatticeFermion Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1);
|
||||
StandardFermionField Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0);
|
||||
StandardFermionField Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1);
|
||||
|
||||
// Dsrc_2f1 = Dsrc_2f1 - Dsrc_2f0;
|
||||
// std::cout << GridLogMessage << " Cross check two halves " <<norm2(Dsrc_2f1)<<std::endl;
|
||||
@ -174,20 +220,20 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
|
||||
{
|
||||
FermionField chi (FGrid_2f); gaussian(RNG5_2f,chi);
|
||||
FermionField phi (FGrid_2f); gaussian(RNG5_2f,phi);
|
||||
GparityFermionField chi (FGrid_2f); gaussian(RNG5_2f,chi);
|
||||
GparityFermionField phi (FGrid_2f); gaussian(RNG5_2f,phi);
|
||||
|
||||
FermionField chi_e (FrbGrid_2f);
|
||||
FermionField chi_o (FrbGrid_2f);
|
||||
GparityFermionField chi_e (FrbGrid_2f);
|
||||
GparityFermionField chi_o (FrbGrid_2f);
|
||||
|
||||
FermionField dchi_e (FrbGrid_2f);
|
||||
FermionField dchi_o (FrbGrid_2f);
|
||||
GparityFermionField dchi_e (FrbGrid_2f);
|
||||
GparityFermionField dchi_o (FrbGrid_2f);
|
||||
|
||||
FermionField phi_e (FrbGrid_2f);
|
||||
FermionField phi_o (FrbGrid_2f);
|
||||
GparityFermionField phi_e (FrbGrid_2f);
|
||||
GparityFermionField phi_o (FrbGrid_2f);
|
||||
|
||||
FermionField dphi_e (FrbGrid_2f);
|
||||
FermionField dphi_o (FrbGrid_2f);
|
||||
GparityFermionField dphi_e (FrbGrid_2f);
|
||||
GparityFermionField dphi_o (FrbGrid_2f);
|
||||
|
||||
pickCheckerboard(Even,chi_e,chi);
|
||||
pickCheckerboard(Odd ,chi_o,chi);
|
||||
@ -212,14 +258,14 @@ int main (int argc, char ** argv)
|
||||
|
||||
}
|
||||
|
||||
FermionField result_2f(FGrid_2f); result_2f=zero;
|
||||
FermionField src_o_2f(FrbGrid_2f);
|
||||
FermionField result_o_2f(FrbGrid_2f);
|
||||
GparityFermionField result_2f(FGrid_2f); result_2f=zero;
|
||||
GparityFermionField src_o_2f(FrbGrid_2f);
|
||||
GparityFermionField result_o_2f(FrbGrid_2f);
|
||||
pickCheckerboard(Odd,src_o_2f,src_2f);
|
||||
result_o_2f=zero;
|
||||
|
||||
ConjugateGradient<FermionField> CG2f(1.0e-8,10000);
|
||||
SchurDiagMooeeOperator<GparityDomainWallFermionR,FermionField> HermOpEO2f(GPDdwf);
|
||||
ConjugateGradient<GparityFermionField> CG2f(1.0e-8,10000);
|
||||
SchurDiagMooeeOperator<GparityDiracOp,GparityFermionField> HermOpEO2f(GPDdwf);
|
||||
CG2f(HermOpEO2f,src_o_2f,result_o_2f);
|
||||
|
||||
std::cout << "2f cb "<<result_o_2f.checkerboard<<std::endl;
|
||||
@ -227,10 +273,10 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
|
||||
|
||||
LatticeFermion res0o (FrbGrid_2f);
|
||||
LatticeFermion res1o (FrbGrid_2f);
|
||||
LatticeFermion res0 (FGrid_2f);
|
||||
LatticeFermion res1 (FGrid_2f);
|
||||
StandardFermionField res0o (FrbGrid_2f);
|
||||
StandardFermionField res1o (FrbGrid_2f);
|
||||
StandardFermionField res0 (FGrid_2f);
|
||||
StandardFermionField res1 (FGrid_2f);
|
||||
|
||||
res0=zero;
|
||||
res1=zero;
|
||||
@ -244,9 +290,9 @@ int main (int argc, char ** argv)
|
||||
setCheckerboard(res0,res0o);
|
||||
setCheckerboard(res1,res1o);
|
||||
|
||||
LatticeFermion replica (FGrid_1f);
|
||||
LatticeFermion replica0(FGrid_1f);
|
||||
LatticeFermion replica1(FGrid_1f);
|
||||
StandardFermionField replica (FGrid_1f);
|
||||
StandardFermionField replica0(FGrid_1f);
|
||||
StandardFermionField replica1(FGrid_1f);
|
||||
Replicate(res0,replica0);
|
||||
Replicate(res1,replica1);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user