mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Faster DhopDirAll for little dirac operator coarsening
This commit is contained in:
parent
8016a465ae
commit
e5d1c09665
@ -342,6 +342,38 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
|
|||||||
coalescedWrite(out[sF], result,lane);
|
coalescedWrite(out[sF], result,lane);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||||
|
int Nsite, const FermionField &in, std::vector<FermionField> &out)
|
||||||
|
{
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto st_v = st.View();
|
||||||
|
|
||||||
|
auto out_Xm = out[0].View();
|
||||||
|
auto out_Ym = out[1].View();
|
||||||
|
auto out_Zm = out[2].View();
|
||||||
|
auto out_Tm = out[3].View();
|
||||||
|
auto out_Xp = out[4].View();
|
||||||
|
auto out_Yp = out[5].View();
|
||||||
|
auto out_Zp = out[6].View();
|
||||||
|
auto out_Tp = out[7].View();
|
||||||
|
|
||||||
|
accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{
|
||||||
|
int sU=sss/Ls;
|
||||||
|
int sF =sss;
|
||||||
|
DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0);
|
||||||
|
DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1);
|
||||||
|
DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2);
|
||||||
|
DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3);
|
||||||
|
DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4);
|
||||||
|
DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5);
|
||||||
|
DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6);
|
||||||
|
DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||||
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
|
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
|
||||||
@ -354,7 +386,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
auto st_v = st.View();
|
auto st_v = st.View();
|
||||||
#define LoopBody(Dir) \
|
#define LoopBody(Dir) \
|
||||||
if (gamma==Dir) { \
|
case Dir : \
|
||||||
accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \
|
accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \
|
||||||
for(int s=0;s<Ls;s++){ \
|
for(int s=0;s<Ls;s++){ \
|
||||||
int sU=ss; \
|
int sU=ss; \
|
||||||
@ -362,8 +394,9 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\
|
DhopDir##Dir(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp);\
|
||||||
} \
|
} \
|
||||||
}); \
|
}); \
|
||||||
}
|
break;
|
||||||
|
|
||||||
|
switch(gamma){
|
||||||
LoopBody(Xp);
|
LoopBody(Xp);
|
||||||
LoopBody(Yp);
|
LoopBody(Yp);
|
||||||
LoopBody(Zp);
|
LoopBody(Zp);
|
||||||
@ -373,7 +406,10 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
LoopBody(Ym);
|
LoopBody(Ym);
|
||||||
LoopBody(Zm);
|
LoopBody(Zm);
|
||||||
LoopBody(Tm);
|
LoopBody(Tm);
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
#undef LoopBody
|
#undef LoopBody
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user