1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Staggered coalseced read

This commit is contained in:
Peter Boyle 2021-03-29 20:01:15 +02:00
parent 8bdadbadac
commit bb89a82a07
3 changed files with 82 additions and 45 deletions

View File

@ -72,19 +72,23 @@ public:
StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){};
static accelerator_inline void multLink(SiteSpinor &phi, template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
const SiteSpinor &chi, const _Spinor &chi,
int mu) int mu)
{ {
mult(&phi(), &U(mu), &chi()); auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
} }
static accelerator_inline void multLinkAdd(SiteSpinor &phi, template<class _Spinor>
static accelerator_inline void multLinkAdd(_Spinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
const SiteSpinor &chi, const _Spinor &chi,
int mu) int mu)
{ {
mac(&phi(), &U(mu), &chi()); auto UU = coalescedRead(U(mu));
mac(&phi(), &UU, &chi());
} }
template <class ref> template <class ref>

View File

@ -184,18 +184,22 @@ public:
mat = TraceIndex<SpinIndex>(P); mat = TraceIndex<SpinIndex>(P);
} }
inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds){ inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
{
for (int mu = 0; mu < Nd; mu++) for (int mu = 0; mu < Nd; mu++)
mat[mu] = PeekIndex<LorentzIndex>(Uds, mu); mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu)
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ {
#undef USE_OLD_INSERT_FORCE
int Ls=Btilde.Grid()->_fdimensions[0]; int Ls=Btilde.Grid()->_fdimensions[0];
autoView( mat_v , mat, AcceleratorWrite);
#ifdef USE_OLD_INSERT_FORCE
GaugeLinkField tmp(mat.Grid()); GaugeLinkField tmp(mat.Grid());
tmp = Zero(); tmp = Zero();
{ {
const int Nsimd = SiteSpinor::Nsimd();
autoView( tmp_v , tmp, AcceleratorWrite); autoView( tmp_v , tmp, AcceleratorWrite);
autoView( Btilde_v , Btilde, AcceleratorRead); autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead); autoView( Atilde_v , Atilde, AcceleratorRead);
@ -208,6 +212,29 @@ public:
}); });
} }
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
#else
{
const int Nsimd = SiteSpinor::Nsimd();
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;
zeroit(sum);
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
for(int spn=0;spn<Ns;spn++){ //sum over spin
auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
auto aa = coalescedRead(Atilde_v[sF]()(spn) );
auto op = outerProduct(bb,aa);
sum = sum + op;
}
}
coalescedWrite(mat_v[sU](mu)(), sum);
});
}
#endif
} }
}; };

View File

@ -35,39 +35,32 @@ NAMESPACE_BEGIN(Grid);
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \ #define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \ SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \ if (SE->_is_local ) { \
if (SE->_permute) { \ int perm= SE->_permute; \
chi_p = &chi; \ chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in[SE->_offset]; \
} \
} else { \ } else { \
chi_p = &buf[SE->_offset]; \ chi = coalescedRead(buf[SE->_offset],lane); \
} \ } \
multLink(Uchi, U[sU], *chi_p, Dir); acceleratorSynchronise(); \
multLink(Uchi, U[sU], chi, Dir);
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \ #define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \ SE = st.GetEntry(ptype, Dir+skew, sF); \
if (SE->_is_local ) { \ if (SE->_is_local ) { \
if (SE->_permute) { \ int perm= SE->_permute; \
chi_p = &chi; \ chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);\
permute(chi, in[SE->_offset], ptype); \
} else { \
chi_p = &in[SE->_offset]; \
} \
} else if ( st.same_node[Dir] ) { \ } else if ( st.same_node[Dir] ) { \
chi_p = &buf[SE->_offset]; \ chi = coalescedRead(buf[SE->_offset],lane); \
} \ } \
if (SE->_is_local || st.same_node[Dir] ) { \ if (SE->_is_local || st.same_node[Dir] ) { \
multLink(Uchi, U[sU], *chi_p, Dir); \ multLink(Uchi, U[sU], chi, Dir); \
} }
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \ #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
SE = st.GetEntry(ptype, Dir+skew, sF); \ SE = st.GetEntry(ptype, Dir+skew, sF); \
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
nmu++; \ nmu++; \
chi_p = &buf[SE->_offset]; \ chi = coalescedRead(buf[SE->_offset],lane); \
multLink(Uchi, U[sU], *chi_p, Dir); \ multLink(Uchi, U[sU], chi, Dir); \
} }
template <class Impl> template <class Impl>
@ -84,12 +77,14 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
const FermionFieldView &in, FermionFieldView &out, int dag) const FermionFieldView &in, FermionFieldView &out, int dag)
{ {
const SiteSpinor *chi_p; typedef decltype(coalescedRead(in[0])) calcSpinor;
SiteSpinor chi; calcSpinor chi;
SiteSpinor Uchi; calcSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
int skew; int skew;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
// for(int s=0;s<LLs;s++){ // for(int s=0;s<LLs;s++){
// //
@ -118,7 +113,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
if ( dag ) { if ( dag ) {
Uchi = - Uchi; Uchi = - Uchi;
} }
vstream(out[sF], Uchi); coalescedWrite(out[sF], Uchi,lane);
} }
}; };
@ -130,13 +125,16 @@ template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag) { const FermionFieldView &in, FermionFieldView &out,int dag)
const SiteSpinor *chi_p; {
SiteSpinor chi; typedef decltype(coalescedRead(in[0])) calcSpinor;
SiteSpinor Uchi; calcSpinor chi;
calcSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
int skew ; int skew ;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
// for(int s=0;s<LLs;s++){ // for(int s=0;s<LLs;s++){
// int sF=LLs*sU+s; // int sF=LLs*sU+s;
@ -165,7 +163,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
if ( dag ) { if ( dag ) {
Uchi = - Uchi; Uchi = - Uchi;
} }
vstream(out[sF], Uchi); coalescedWrite(out[sF], Uchi,lane);
} }
}; };
@ -178,14 +176,17 @@ template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag) { const FermionFieldView &in, FermionFieldView &out,int dag)
const SiteSpinor *chi_p; {
// SiteSpinor chi; typedef decltype(coalescedRead(in[0])) calcSpinor;
SiteSpinor Uchi; calcSpinor chi;
calcSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
int nmu=0; int nmu=0;
int skew ; int skew ;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
// for(int s=0;s<LLs;s++){ // for(int s=0;s<LLs;s++){
// int sF=LLs*sU+s; // int sF=LLs*sU+s;
@ -211,11 +212,12 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
} }
if ( nmu ) { if ( nmu ) {
if ( dag ) { auto _out = coalescedRead(out[sF],lane);
out[sF] = out[sF] - Uchi; if ( dag ) {
coalescedWrite(out[sF], _out-Uchi,lane);
} else { } else {
out[sF] = out[sF] + Uchi; coalescedWrite(out[sF], _out+Uchi,lane);
} }
} }
} }
@ -261,6 +263,8 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
GridBase *FGrid=in.Grid(); GridBase *FGrid=in.Grid();
GridBase *UGrid=U.Grid(); GridBase *UGrid=U.Grid();
typedef StaggeredKernels<Impl> ThisKernel; typedef StaggeredKernels<Impl> ThisKernel;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
autoView( UUU_v , UUU, AcceleratorRead); autoView( UUU_v , UUU, AcceleratorRead);
autoView( U_v , U, AcceleratorRead); autoView( U_v , U, AcceleratorRead);
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);
@ -301,6 +305,8 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
GridBase *FGrid=in.Grid(); GridBase *FGrid=in.Grid();
GridBase *UGrid=U.Grid(); GridBase *UGrid=U.Grid();
typedef StaggeredKernels<Impl> ThisKernel; typedef StaggeredKernels<Impl> ThisKernel;
const int Nsimd = SiteHalfSpinor::Nsimd();
const int lane=acceleratorSIMTlane(Nsimd);
autoView( UUU_v , U, AcceleratorRead); autoView( UUU_v , U, AcceleratorRead);
autoView( U_v , U, AcceleratorRead); autoView( U_v , U, AcceleratorRead);
autoView( in_v , in, AcceleratorRead); autoView( in_v , in, AcceleratorRead);