1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Implemented first, unoptimized version of hand-unrolled G-parity kernels

Improved Test_gparity
This commit is contained in:
Christopher Kelly 2017-08-22 17:12:25 -04:00
parent 383ca7d392
commit ab50145001
3 changed files with 485 additions and 200 deletions

View File

@ -425,6 +425,22 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*?
////////////////////////////////////////////////////////////////////////////////////////
namespace GparityWilsonImpl_helper{
template<typename Get, typename A,typename B>
struct getAB;
template<typename A,typename B>
struct getAB<A,A,B>{
static inline A & ref(A &a, B &b){ return a; }
};
template<typename A,typename B>
struct getAB<B,A,B>{
static inline B & ref(A &a, B &b){ return b; }
};
};
template <class S, int Nrepresentation, class Options=CoeffReal>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
public:
@ -462,7 +478,10 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
ImplParams Params;
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
std::vector<SiteSpinor, alignedAllocator<SiteSpinor> > tmp_full;
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > tmp_half;
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p), tmp_full(GridThread::GetThreads()), tmp_half(GridThread::GetThreads()){};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
@ -538,6 +557,66 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
}
template <class ref>
inline void loadLinkElement(Simd &reg, ref &memory) {
reg = memory;
}
template<typename SiteSpinorType>
void GparityTwistPermute(SiteSpinorType &into, const SiteSpinorType &from, const int direction, const int distance, const int perm, GridBase* grid){
typedef typename SiteSpinorType::scalar_object sobj;
sobj stmp;
std::vector<sobj> vals(grid->Nsimd());
extract(from,vals);
std::vector<int> icoor;
for(int s=0;s<grid->Nsimd();s++){
grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane;
if ( distance == 1) {
permute_lane = icoor[direction]?1:0;
} else {
permute_lane = icoor[direction]?0:1;
}
if(perm) permute_lane = !permute_lane;
if ( permute_lane ) {
stmp(0) = vals[s](1);
stmp(1) = vals[s](0);
vals[s] = stmp;
}
}
merge(into,vals);
}
template<typename SiteSpinorType>
const SiteSpinorType & GparityGetChi(int &g, SiteSpinorType const* in, const int dir, const int f, StencilEntry *SE, StencilImpl &st){
const int mmu = dir % 4;
const int direction = st._directions[dir];
const int sl = st._grid->_simd_layout[direction];
const int perm = SE->_permute;
g = f;
if(SE->_around_the_world && Params.twists[mmu]){
if(sl == 1){ //not SIMD vectorized in G-parity direction so just change the flavor index accessed to implement the twist
g = (f+1) % 2;
return in[SE->_offset];
}else{ //SIMD vectorized in Gparity direction
const int me = omp_get_thread_num();
const int distance = st._distances[dir];
assert(distance == -1 || distance == 1);
SiteSpinorType &tmp = GparityWilsonImpl_helper::getAB<SiteSpinorType, SiteSpinor, SiteHalfSpinor>::ref(tmp_full[me], tmp_half[me]);
GparityTwistPermute<SiteSpinorType>(tmp, in[SE->_offset], direction, distance, perm, st._grid);
return tmp;
}
}else return in[SE->_offset];
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);

View File

@ -30,60 +30,77 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define REGISTER
#define LOAD_CHIMU \
{const SiteSpinor & ref (in._odata[offset]); \
Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\
Chimu_02=ref()(0)(2);\
Chimu_10=ref()(1)(0);\
Chimu_11=ref()(1)(1);\
Chimu_12=ref()(1)(2);\
Chimu_20=ref()(2)(0);\
Chimu_21=ref()(2)(1);\
Chimu_22=ref()(2)(2);\
Chimu_30=ref()(3)(0);\
Chimu_31=ref()(3)(1);\
Chimu_32=ref()(3)(2);}
#define LOAD_CHIMU_BODY(F) \
Chimu_00=ref(F)(0)(0); \
Chimu_01=ref(F)(0)(1); \
Chimu_02=ref(F)(0)(2); \
Chimu_10=ref(F)(1)(0); \
Chimu_11=ref(F)(1)(1); \
Chimu_12=ref(F)(1)(2); \
Chimu_20=ref(F)(2)(0); \
Chimu_21=ref(F)(2)(1); \
Chimu_22=ref(F)(2)(2); \
Chimu_30=ref(F)(3)(0); \
Chimu_31=ref(F)(3)(1); \
Chimu_32=ref(F)(3)(2)
#define LOAD_CHIMU(DIR,F) \
{ const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); }
#define LOAD_CHIMU_GPARITY(DIR,F) \
{ int g; const SiteSpinor & ref = GparityGetChi<SiteSpinor>(g,in._odata.data(),DIR,F,SE,st); LOAD_CHIMU_BODY(g); }
#define LOAD_CHI_BODY(F) \
Chi_00 = ref(F)(0)(0);\
Chi_01 = ref(F)(0)(1);\
Chi_02 = ref(F)(0)(2);\
Chi_10 = ref(F)(1)(0);\
Chi_11 = ref(F)(1)(1);\
Chi_12 = ref(F)(1)(2)
#define LOAD_CHI(DIR,F) \
{const SiteHalfSpinor &ref(buf[offset]); LOAD_CHI_BODY(F); }
#define LOAD_CHI_GPARITY(DIR,F) \
{ int g; const SiteHalfSpinor &ref = GparityGetChi<SiteHalfSpinor>(g,buf,DIR,F,SE,st); LOAD_CHI_BODY(g); }
#define LOAD_CHI\
{const SiteHalfSpinor &ref(buf[offset]); \
Chi_00 = ref()(0)(0);\
Chi_01 = ref()(0)(1);\
Chi_02 = ref()(0)(2);\
Chi_10 = ref()(1)(0);\
Chi_11 = ref()(1)(1);\
Chi_12 = ref()(1)(2);}
// To splat or not to splat depends on the implementation
#define MULT_2SPIN(A)\
{auto & ref(U._odata[sU](A)); \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
Impl::loadLinkElement(U_01,ref()(0,1)); \
Impl::loadLinkElement(U_11,ref()(1,1)); \
Impl::loadLinkElement(U_21,ref()(2,1)); \
UChi_00 = U_00*Chi_00;\
UChi_10 = U_00*Chi_10;\
UChi_01 = U_10*Chi_00;\
UChi_11 = U_10*Chi_10;\
UChi_02 = U_20*Chi_00;\
UChi_12 = U_20*Chi_10;\
UChi_00+= U_01*Chi_01;\
UChi_10+= U_01*Chi_11;\
UChi_01+= U_11*Chi_01;\
UChi_11+= U_11*Chi_11;\
UChi_02+= U_21*Chi_01;\
UChi_12+= U_21*Chi_11;\
Impl::loadLinkElement(U_00,ref()(0,2)); \
Impl::loadLinkElement(U_10,ref()(1,2)); \
Impl::loadLinkElement(U_20,ref()(2,2)); \
UChi_00+= U_00*Chi_02;\
UChi_10+= U_00*Chi_12;\
UChi_01+= U_10*Chi_02;\
UChi_11+= U_10*Chi_12;\
UChi_02+= U_20*Chi_02;\
UChi_12+= U_20*Chi_12;}
#define MULT_2SPIN_BODY \
Impl::loadLinkElement(U_00,ref()(0,0)); \
Impl::loadLinkElement(U_10,ref()(1,0)); \
Impl::loadLinkElement(U_20,ref()(2,0)); \
Impl::loadLinkElement(U_01,ref()(0,1)); \
Impl::loadLinkElement(U_11,ref()(1,1)); \
Impl::loadLinkElement(U_21,ref()(2,1)); \
UChi_00 = U_00*Chi_00; \
UChi_10 = U_00*Chi_10; \
UChi_01 = U_10*Chi_00; \
UChi_11 = U_10*Chi_10; \
UChi_02 = U_20*Chi_00; \
UChi_12 = U_20*Chi_10; \
UChi_00+= U_01*Chi_01; \
UChi_10+= U_01*Chi_11; \
UChi_01+= U_11*Chi_01; \
UChi_11+= U_11*Chi_11; \
UChi_02+= U_21*Chi_01; \
UChi_12+= U_21*Chi_11; \
Impl::loadLinkElement(U_00,ref()(0,2)); \
Impl::loadLinkElement(U_10,ref()(1,2)); \
Impl::loadLinkElement(U_20,ref()(2,2)); \
UChi_00+= U_00*Chi_02; \
UChi_10+= U_00*Chi_12; \
UChi_01+= U_10*Chi_02; \
UChi_11+= U_10*Chi_12; \
UChi_02+= U_20*Chi_02; \
UChi_12+= U_20*Chi_12
#define MULT_2SPIN(A,F) \
{auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; }
#define MULT_2SPIN_GPARITY(A,F) \
{auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; }
#define PERMUTE_DIR(dir) \
@ -307,84 +324,85 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
result_31-= UChi_11; \
result_32-= UChi_12;
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON) \
#define HAND_STENCIL_LEG(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHIMU; \
LOAD_CHIMU_IMPL(DIR,F); \
PROJ; \
if ( perm) { \
PERMUTE_DIR(PERM); \
} \
} else { \
LOAD_CHI; \
LOAD_CHI_IMPL(DIR,F); \
} \
MULT_2SPIN(DIR); \
MULT_2SPIN_IMPL(DIR,F); \
RECON;
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON) \
#define HAND_STENCIL_LEG_INT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
local = SE->_is_local; \
perm = SE->_permute; \
if ( local ) { \
LOAD_CHIMU; \
LOAD_CHIMU_IMPL(DIR,F); \
PROJ; \
if ( perm) { \
PERMUTE_DIR(PERM); \
} \
} else if ( st.same_node[DIR] ) { \
LOAD_CHI; \
LOAD_CHI_IMPL(DIR,F); \
} \
if (local || st.same_node[DIR] ) { \
MULT_2SPIN(DIR); \
MULT_2SPIN_IMPL(DIR,F); \
RECON; \
}
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON) \
#define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
SE=st.GetEntry(ptype,DIR,ss); \
offset = SE->_offset; \
if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \
LOAD_CHI; \
MULT_2SPIN(DIR); \
LOAD_CHI_IMPL(DIR,F); \
MULT_2SPIN_IMPL(DIR,F); \
RECON; \
nmu++; \
}
#define HAND_RESULT(ss) \
#define HAND_RESULT(ss,F) \
{ \
SiteSpinor & ref (out._odata[ss]); \
vstream(ref()(0)(0),result_00); \
vstream(ref()(0)(1),result_01); \
vstream(ref()(0)(2),result_02); \
vstream(ref()(1)(0),result_10); \
vstream(ref()(1)(1),result_11); \
vstream(ref()(1)(2),result_12); \
vstream(ref()(2)(0),result_20); \
vstream(ref()(2)(1),result_21); \
vstream(ref()(2)(2),result_22); \
vstream(ref()(3)(0),result_30); \
vstream(ref()(3)(1),result_31); \
vstream(ref()(3)(2),result_32); \
vstream(ref(F)(0)(0),result_00); \
vstream(ref(F)(0)(1),result_01); \
vstream(ref(F)(0)(2),result_02); \
vstream(ref(F)(1)(0),result_10); \
vstream(ref(F)(1)(1),result_11); \
vstream(ref(F)(1)(2),result_12); \
vstream(ref(F)(2)(0),result_20); \
vstream(ref(F)(2)(1),result_21); \
vstream(ref(F)(2)(2),result_22); \
vstream(ref(F)(3)(0),result_30); \
vstream(ref(F)(3)(1),result_31); \
vstream(ref(F)(3)(2),result_32); \
}
#define HAND_RESULT_EXT(ss) \
#define HAND_RESULT_EXT(ss,F) \
if (nmu){ \
SiteSpinor & ref (out._odata[ss]); \
ref()(0)(0)+=result_00; \
ref()(0)(1)+=result_01; \
ref()(0)(2)+=result_02; \
ref()(1)(0)+=result_10; \
ref()(1)(1)+=result_11; \
ref()(1)(2)+=result_12; \
ref()(2)(0)+=result_20; \
ref()(2)(1)+=result_21; \
ref()(2)(2)+=result_22; \
ref()(3)(0)+=result_30; \
ref()(3)(1)+=result_31; \
ref()(3)(2)+=result_32; \
ref(F)(0)(0)+=result_00; \
ref(F)(0)(1)+=result_01; \
ref(F)(0)(2)+=result_02; \
ref(F)(1)(0)+=result_10; \
ref(F)(1)(1)+=result_11; \
ref(F)(1)(2)+=result_12; \
ref(F)(2)(0)+=result_20; \
ref(F)(2)(1)+=result_21; \
ref(F)(2)(2)+=result_22; \
ref(F)(3)(0)+=result_30; \
ref(F)(3)(1)+=result_31; \
ref(F)(3)(2)+=result_32; \
}
@ -463,15 +481,18 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
int offset,local,perm, ptype;
StencilEntry *SE;
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON);
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss);
#define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
@ -485,16 +506,19 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
StencilEntry *SE;
int offset,local,perm, ptype;
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON);
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss);
#define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
@ -509,16 +533,20 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
int offset,local,perm, ptype;
StencilEntry *SE;
ZERO_RESULT;
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT(ss);
#define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
@ -532,16 +560,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
StencilEntry *SE;
int offset,local,perm, ptype;
ZERO_RESULT;
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT(ss);
#define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT(ss,F)
HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl> void
@ -557,16 +589,20 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
int offset,local,perm, ptype;
StencilEntry *SE;
int nmu=0;
ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
HAND_RESULT_EXT(ss);
#define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Yp,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zp,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tp,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xm,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Ym,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
template<class Impl>
@ -581,16 +617,20 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
ZERO_RESULT;
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
HAND_RESULT_EXT(ss);
#define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \
ZERO_RESULT; \
HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zp,ZP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tp,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(XM_PROJ,3,Xm,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(YM_PROJ,2,Ym,YM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \
HAND_RESULT_EXT(ss,F)
HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN);
}
////////////////////////////////////////////////
@ -647,10 +687,130 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
FermionField &out){ assert(0); } \
HAND_SPECIALISE_EMPTY(GparityWilsonImplF);
HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
//HAND_SPECIALISE_EMPTY(GparityWilsonImplD);
HAND_SPECIALISE_EMPTY(GparityWilsonImplFH);
HAND_SPECIALISE_EMPTY(GparityWilsonImplDF);
template<> void
WilsonKernels<GparityWilsonImplD>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef GparityWilsonImplD Impl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
HAND_DOP_SITE(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
}
template<>
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef GparityWilsonImplD Impl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
HAND_DOP_SITE_DAG(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
}
template<> void
WilsonKernels<GparityWilsonImplD>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef GparityWilsonImplD Impl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
HAND_DOP_SITE_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
}
template<>
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef GparityWilsonImplD Impl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
HAND_DOP_SITE_DAG_INT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
}
template<> void
WilsonKernels<GparityWilsonImplD>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef GparityWilsonImplD Impl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
int offset,local,perm, ptype;
StencilEntry *SE;
int nmu=0;
HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
nmu = 0;
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
}
template<>
void WilsonKernels<GparityWilsonImplD>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
typedef GparityWilsonImplD Impl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
HAND_DECLARATIONS(ignore);
StencilEntry *SE;
int offset,local,perm, ptype;
int nmu=0;
HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
nmu = 0;
HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY);
}
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \

View File

@ -33,22 +33,68 @@ using namespace std;
using namespace Grid;
using namespace Grid::QCD;
typedef typename GparityDomainWallFermionR::FermionField FermionField;
//typedef GparityDomainWallFermionD GparityDiracOp;
//typedef DomainWallFermionD StandardDiracOp;
//#define DOP_PARAMS
typedef GparityMobiusFermionD GparityDiracOp;
typedef MobiusFermionD StandardDiracOp;
#define DOP_PARAMS ,1.5, 0.5
typedef typename GparityDiracOp::FermionField GparityFermionField;
typedef typename GparityDiracOp::GaugeField GparityGaugeField;
typedef typename GparityFermionField::vector_type vComplexType;
typedef typename StandardDiracOp::FermionField StandardFermionField;
typedef typename StandardDiracOp::GaugeField StandardGaugeField;
enum{ same_vComplex = std::is_same<vComplexType, typename StandardFermionField::vector_type>::value };
static_assert(same_vComplex == 1, "Dirac Operators must have same underlying SIMD complex type");
int main (int argc, char ** argv)
{
const int nu = 3;
int nu = 0;
Grid_init(&argc,&argv);
for(int i=1;i<argc;i++){
if(std::string(argv[i]) == "--Gparity-dir"){
std::stringstream ss; ss << argv[i+1]; ss >> nu;
std::cout << GridLogMessage << "Set Gparity direction to " << nu << std::endl;
}
}
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Testing Gparity Dirac operator "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexType::Nsimd()<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using UNROLLED Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
const int Ls=4;
const int L =4;
std::vector<int> latt_2f(Nd,L);
std::vector<int> latt_1f(Nd,L); latt_1f[nu] = 2*L;
//const int L =4;
//std::vector<int> latt_2f(Nd,L);
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> latt_2f = GridDefaultLatt();
std::vector<int> latt_1f(latt_2f); latt_1f[nu] = 2*latt_2f[nu];
int L = latt_2f[nu];
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplexType::Nsimd());
std::cout << GridLogMessage << "SIMD layout: ";
for(int i=0;i<simd_layout.size();i++) std::cout << simd_layout[i] << " ";
std::cout << std::endl;
std::vector<int> mpi_layout = GridDefaultMpi(); //node layout
GridCartesian * UGrid_1f = SpaceTimeGrid::makeFourDimGrid(latt_1f, simd_layout, mpi_layout);
@ -67,13 +113,13 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5_2f(FGrid_2f); RNG5_2f.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu_2f(UGrid_2f);
GparityGaugeField Umu_2f(UGrid_2f);
SU3::HotConfiguration(RNG4_2f,Umu_2f);
LatticeFermion src (FGrid_2f);
LatticeFermion tmpsrc(FGrid_2f);
FermionField src_2f(FGrid_2f);
LatticeFermion src_1f(FGrid_1f);
StandardFermionField src (FGrid_2f);
StandardFermionField tmpsrc(FGrid_2f);
GparityFermionField src_2f(FGrid_2f);
StandardFermionField src_1f(FGrid_1f);
// Replicate fermion source
random(RNG5_2f,src);
@ -81,8 +127,8 @@ int main (int argc, char ** argv)
tmpsrc=src*2.0;
PokeIndex<0>(src_2f,tmpsrc,1);
LatticeFermion result_1f(FGrid_1f); result_1f=zero;
LatticeGaugeField Umu_1f(UGrid_1f);
StandardFermionField result_1f(FGrid_1f); result_1f=zero;
StandardGaugeField Umu_1f(UGrid_1f);
Replicate(Umu_2f,Umu_1f);
//Coordinate grid for reference
@ -92,7 +138,7 @@ int main (int argc, char ** argv)
//Copy-conjugate the gauge field
//First C-shift the lattice by Lx/2
{
LatticeGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) );
StandardGaugeField Umu_shift = conjugate( Cshift(Umu_1f,nu,L) );
Umu_1f = where( xcoor_1f >= Integer(L), Umu_shift, Umu_1f );
// hack test to check the same
@ -101,7 +147,7 @@ int main (int argc, char ** argv)
cout << GridLogMessage << "Umu diff " << norm2(Umu_shift)<<std::endl;
//Make the gauge field antiperiodic in nu-direction
LatticeColourMatrix Unu(UGrid_1f);
decltype(PeekIndex<LorentzIndex>(Umu_1f,nu)) Unu(UGrid_1f);
Unu = PeekIndex<LorentzIndex>(Umu_1f,nu);
Unu = where(xcoor_1f == Integer(2*L-1), -Unu, Unu);
PokeIndex<LorentzIndex>(Umu_1f,Unu,nu);
@ -115,33 +161,33 @@ int main (int argc, char ** argv)
RealD mass=0.0;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5);
StandardDiracOp Ddwf(Umu_1f,*FGrid_1f,*FrbGrid_1f,*UGrid_1f,*UrbGrid_1f,mass,M5 DOP_PARAMS);
LatticeFermion src_o_1f(FrbGrid_1f);
LatticeFermion result_o_1f(FrbGrid_1f);
StandardFermionField src_o_1f(FrbGrid_1f);
StandardFermionField result_o_1f(FrbGrid_1f);
pickCheckerboard(Odd,src_o_1f,src_1f);
result_o_1f=zero;
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
SchurDiagMooeeOperator<StandardDiracOp,StandardFermionField> HermOpEO(Ddwf);
ConjugateGradient<StandardFermionField> CG(1.0e-8,10000);
CG(HermOpEO,src_o_1f,result_o_1f);
// const int nu = 3;
std::vector<int> twists(Nd,0);
twists[nu] = 1;
GparityDomainWallFermionR::ImplParams params;
GparityDiracOp::ImplParams params;
params.twists = twists;
GparityDomainWallFermionR GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5,params);
GparityDiracOp GPDdwf(Umu_2f,*FGrid_2f,*FrbGrid_2f,*UGrid_2f,*UrbGrid_2f,mass,M5 DOP_PARAMS,params);
for(int disp=-1;disp<=1;disp+=2)
for(int mu=0;mu<5;mu++)
{
FermionField Dsrc_2f(FGrid_2f);
GparityFermionField Dsrc_2f(FGrid_2f);
LatticeFermion Dsrc_1f(FGrid_1f);
LatticeFermion Dsrc_2freplica(FGrid_1f);
LatticeFermion Dsrc_2freplica0(FGrid_1f);
LatticeFermion Dsrc_2freplica1(FGrid_1f);
StandardFermionField Dsrc_1f(FGrid_1f);
StandardFermionField Dsrc_2freplica(FGrid_1f);
StandardFermionField Dsrc_2freplica0(FGrid_1f);
StandardFermionField Dsrc_2freplica1(FGrid_1f);
if ( mu ==0 ) {
std::cout << GridLogMessage<< " Cross checking entire hopping term"<<std::endl;
@ -156,8 +202,8 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "S norms "<< norm2(src_2f) << " " << norm2(src_1f) <<std::endl;
std::cout << GridLogMessage << "D norms "<< norm2(Dsrc_2f)<< " " << norm2(Dsrc_1f) <<std::endl;
LatticeFermion Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0);
LatticeFermion Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1);
StandardFermionField Dsrc_2f0(FGrid_2f); Dsrc_2f0 = PeekIndex<0>(Dsrc_2f,0);
StandardFermionField Dsrc_2f1(FGrid_2f); Dsrc_2f1 = PeekIndex<0>(Dsrc_2f,1);
// Dsrc_2f1 = Dsrc_2f1 - Dsrc_2f0;
// std::cout << GridLogMessage << " Cross check two halves " <<norm2(Dsrc_2f1)<<std::endl;
@ -174,20 +220,20 @@ int main (int argc, char ** argv)
}
{
FermionField chi (FGrid_2f); gaussian(RNG5_2f,chi);
FermionField phi (FGrid_2f); gaussian(RNG5_2f,phi);
GparityFermionField chi (FGrid_2f); gaussian(RNG5_2f,chi);
GparityFermionField phi (FGrid_2f); gaussian(RNG5_2f,phi);
FermionField chi_e (FrbGrid_2f);
FermionField chi_o (FrbGrid_2f);
GparityFermionField chi_e (FrbGrid_2f);
GparityFermionField chi_o (FrbGrid_2f);
FermionField dchi_e (FrbGrid_2f);
FermionField dchi_o (FrbGrid_2f);
GparityFermionField dchi_e (FrbGrid_2f);
GparityFermionField dchi_o (FrbGrid_2f);
FermionField phi_e (FrbGrid_2f);
FermionField phi_o (FrbGrid_2f);
GparityFermionField phi_e (FrbGrid_2f);
GparityFermionField phi_o (FrbGrid_2f);
FermionField dphi_e (FrbGrid_2f);
FermionField dphi_o (FrbGrid_2f);
GparityFermionField dphi_e (FrbGrid_2f);
GparityFermionField dphi_o (FrbGrid_2f);
pickCheckerboard(Even,chi_e,chi);
pickCheckerboard(Odd ,chi_o,chi);
@ -212,14 +258,14 @@ int main (int argc, char ** argv)
}
FermionField result_2f(FGrid_2f); result_2f=zero;
FermionField src_o_2f(FrbGrid_2f);
FermionField result_o_2f(FrbGrid_2f);
GparityFermionField result_2f(FGrid_2f); result_2f=zero;
GparityFermionField src_o_2f(FrbGrid_2f);
GparityFermionField result_o_2f(FrbGrid_2f);
pickCheckerboard(Odd,src_o_2f,src_2f);
result_o_2f=zero;
ConjugateGradient<FermionField> CG2f(1.0e-8,10000);
SchurDiagMooeeOperator<GparityDomainWallFermionR,FermionField> HermOpEO2f(GPDdwf);
ConjugateGradient<GparityFermionField> CG2f(1.0e-8,10000);
SchurDiagMooeeOperator<GparityDiracOp,GparityFermionField> HermOpEO2f(GPDdwf);
CG2f(HermOpEO2f,src_o_2f,result_o_2f);
std::cout << "2f cb "<<result_o_2f.checkerboard<<std::endl;
@ -227,10 +273,10 @@ int main (int argc, char ** argv)
std::cout << " result norms " <<norm2(result_o_2f)<<" " <<norm2(result_o_1f)<<std::endl;
LatticeFermion res0o (FrbGrid_2f);
LatticeFermion res1o (FrbGrid_2f);
LatticeFermion res0 (FGrid_2f);
LatticeFermion res1 (FGrid_2f);
StandardFermionField res0o (FrbGrid_2f);
StandardFermionField res1o (FrbGrid_2f);
StandardFermionField res0 (FGrid_2f);
StandardFermionField res1 (FGrid_2f);
res0=zero;
res1=zero;
@ -244,9 +290,9 @@ int main (int argc, char ** argv)
setCheckerboard(res0,res0o);
setCheckerboard(res1,res1o);
LatticeFermion replica (FGrid_1f);
LatticeFermion replica0(FGrid_1f);
LatticeFermion replica1(FGrid_1f);
StandardFermionField replica (FGrid_1f);
StandardFermionField replica0(FGrid_1f);
StandardFermionField replica1(FGrid_1f);
Replicate(res0,replica0);
Replicate(res1,replica1);