mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Instantiations move. Tried using Gianluca's suggestion about avoiding threadIdx but doesn't
seem to make a difference. Will revisit this and probably remove the lane parameter from the coalescedRead
This commit is contained in:
parent
86e7fb6e86
commit
ad2c433574
@ -43,11 +43,12 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
SE = st.GetEntry(ptype, Dir, sF); \
|
SE = st.GetEntry(ptype, Dir, sF); \
|
||||||
if (SE->_is_local) { \
|
if (SE->_is_local) { \
|
||||||
int perm= SE->_permute; \
|
int perm= SE->_permute; \
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm); \
|
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||||
spProj(chi,tmp); \
|
spProj(chi,tmp); \
|
||||||
} else { \
|
} else { \
|
||||||
chi = coalescedRead(buf[SE->_offset]); \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
|
synchronise(); \
|
||||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||||
Recon(result, Uchi);
|
Recon(result, Uchi);
|
||||||
|
|
||||||
@ -55,36 +56,41 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
SE = st.GetEntry(ptype, Dir, sF); \
|
SE = st.GetEntry(ptype, Dir, sF); \
|
||||||
if (SE->_is_local) { \
|
if (SE->_is_local) { \
|
||||||
int perm= SE->_permute; \
|
int perm= SE->_permute; \
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm); \
|
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||||
spProj(chi,tmp); \
|
spProj(chi,tmp); \
|
||||||
} else if ( st.same_node[Dir] ) { \
|
} else if ( st.same_node[Dir] ) { \
|
||||||
chi = coalescedRead(buf[SE->_offset]); \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
|
synchronise(); \
|
||||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
}
|
} \
|
||||||
|
synchronise();
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
||||||
SE = st.GetEntry(ptype, Dir, sF); \
|
SE = st.GetEntry(ptype, Dir, sF); \
|
||||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||||
auto chi = coalescedRead(buf[SE->_offset]); \
|
auto chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
nmu++; \
|
nmu++; \
|
||||||
}
|
} \
|
||||||
|
synchronise();
|
||||||
|
|
||||||
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \
|
||||||
if (gamma == Dir) { \
|
if (gamma == Dir) { \
|
||||||
if (SE->_is_local ) { \
|
if (SE->_is_local ) { \
|
||||||
int perm= SE->_permute; \
|
int perm= SE->_permute; \
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm); \
|
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||||
spProj(chi,tmp); \
|
spProj(chi,tmp); \
|
||||||
} else { \
|
} else { \
|
||||||
chi = coalescedRead(buf[SE->_offset]); \
|
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
} \
|
} \
|
||||||
|
synchronise(); \
|
||||||
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
|
synchronise(); \
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
@ -103,7 +109,8 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV
|
|||||||
calcSpinor result;
|
calcSpinor result;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
|
GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp);
|
||||||
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
|
GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp);
|
||||||
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
|
GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp);
|
||||||
@ -112,7 +119,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV
|
|||||||
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
|
||||||
coalescedWrite(out[sF],result);
|
coalescedWrite(out[sF],result,lane);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -129,6 +136,8 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
|
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
|
GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp);
|
||||||
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
|
GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp);
|
||||||
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
|
GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp);
|
||||||
@ -137,7 +146,7 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView
|
|||||||
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
|
||||||
coalescedWrite(out[sF], result);
|
coalescedWrite(out[sF], result,lane);
|
||||||
};
|
};
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Interior kernels
|
// Interior kernels
|
||||||
@ -155,6 +164,8 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi
|
|||||||
calcSpinor result;
|
calcSpinor result;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
|
|
||||||
result=Zero();
|
result=Zero();
|
||||||
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
|
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
|
||||||
@ -165,7 +176,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi
|
|||||||
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
|
||||||
coalescedWrite(out[sF], result);
|
coalescedWrite(out[sF], result,lane);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -175,6 +186,8 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeField
|
|||||||
{
|
{
|
||||||
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
|
typedef decltype(coalescedRead(buf[0])) calcHalfSpinor;
|
||||||
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
typedef decltype(coalescedRead(in[0])) calcSpinor;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
|
|
||||||
calcHalfSpinor chi;
|
calcHalfSpinor chi;
|
||||||
// calcHalfSpinor *chi_p;
|
// calcHalfSpinor *chi_p;
|
||||||
@ -191,7 +204,7 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeField
|
|||||||
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
|
GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm);
|
||||||
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
|
||||||
coalescedWrite(out[sF], result);
|
coalescedWrite(out[sF], result,lane);
|
||||||
};
|
};
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Exterior kernels
|
// Exterior kernels
|
||||||
@ -209,6 +222,8 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
result=Zero();
|
result=Zero();
|
||||||
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
|
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
|
||||||
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
|
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
|
||||||
@ -219,9 +234,9 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi
|
|||||||
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm);
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
auto out_t = coalescedRead(out[sF]);
|
auto out_t = coalescedRead(out[sF],lane);
|
||||||
out_t = out_t + result;
|
out_t = out_t + result;
|
||||||
coalescedWrite(out[sF],out_t);
|
coalescedWrite(out[sF],out_t,lane);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -238,6 +253,8 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
|
|||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
result=Zero();
|
result=Zero();
|
||||||
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
|
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
|
||||||
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
|
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
|
||||||
@ -248,9 +265,9 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
|
|||||||
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
|
GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm);
|
||||||
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
|
GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm);
|
||||||
if ( nmu ) {
|
if ( nmu ) {
|
||||||
auto out_t = coalescedRead(out[sF]);
|
auto out_t = coalescedRead(out[sF],lane);
|
||||||
out_t = out_t + result;
|
out_t = out_t + result;
|
||||||
coalescedWrite(out[sF],out_t);
|
coalescedWrite(out[sF],out_t,lane);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -265,15 +282,17 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
|
|||||||
calcHalfSpinor Uchi;
|
calcHalfSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
|
const int Nsimd = SiteHalfSpinor::Nsimd();
|
||||||
|
const int lane=SIMTlane(Nsimd);
|
||||||
|
|
||||||
SE = st.GetEntry(ptype, dir, sF);
|
SE = st.GetEntry(ptype, dir, sF);
|
||||||
if (gamma == Xp) {
|
if (gamma == Xp) {
|
||||||
if (SE->_is_local ) {
|
if (SE->_is_local ) {
|
||||||
int perm= SE->_permute;
|
int perm= SE->_permute;
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm);
|
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);
|
||||||
spProjXp(chi,tmp);
|
spProjXp(chi,tmp);
|
||||||
} else {
|
} else {
|
||||||
chi = coalescedRead(buf[SE->_offset]);
|
chi = coalescedRead(buf[SE->_offset],lane);
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
|
Impl::multLink(Uchi, U[sU], chi, dir, SE, st);
|
||||||
spReconXp(result, Uchi);
|
spReconXp(result, Uchi);
|
||||||
@ -286,7 +305,7 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
|
|||||||
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
|
GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm);
|
||||||
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
|
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
|
||||||
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
||||||
coalescedWrite(out[sF], result);
|
coalescedWrite(out[sF], result,lane);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
@ -355,37 +374,6 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(const SitePropagator &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// G-parity requires more specialised implementation.
|
|
||||||
#define NO_CURR_SITE(Impl) \
|
|
||||||
template <> \
|
|
||||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, \
|
|
||||||
const SitePropagator &q_in_2, \
|
|
||||||
SitePropagator &q_out, \
|
|
||||||
DoubledGaugeFieldView &U, \
|
|
||||||
unsigned int sU, \
|
|
||||||
unsigned int mu, \
|
|
||||||
bool switch_sign) \
|
|
||||||
{ \
|
|
||||||
assert(0); \
|
|
||||||
} \
|
|
||||||
template <> \
|
|
||||||
void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, \
|
|
||||||
const SitePropagator &q_in_2, \
|
|
||||||
SitePropagator &q_out, \
|
|
||||||
DoubledGaugeFieldView &U, \
|
|
||||||
unsigned int mu, \
|
|
||||||
unsigned int sU, \
|
|
||||||
bool switch_sign) \
|
|
||||||
{ \
|
|
||||||
assert(0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
NO_CURR_SITE(GparityWilsonImplF);
|
|
||||||
NO_CURR_SITE(GparityWilsonImplD);
|
|
||||||
NO_CURR_SITE(GparityWilsonImplFH);
|
|
||||||
NO_CURR_SITE(GparityWilsonImplDF);
|
|
||||||
|
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
* Name: SeqConservedCurrentSiteFwd
|
* Name: SeqConservedCurrentSiteFwd
|
||||||
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
|
* Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user