1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Simplify Impl

This commit is contained in:
Peter Boyle 2019-06-09 22:26:27 +01:00
parent d6c0e0756d
commit 36f06555a2
6 changed files with 33 additions and 44 deletions

View File

@ -102,19 +102,6 @@ public:
#endif
}
static accelerator_inline void multLinkProp(SitePropagator &phi,
const SiteDoubledGaugeField &U,
const SitePropagator &chi,int mu)
{
SiteGaugeLink UU;
for (int i = 0; i < Dimension; i++) {
for (int j = 0; j < Dimension; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j));
}
}
mult(&phi(), &UU(), &chi());
}
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
{
SiteScalarGaugeField ScalarUmu;

View File

@ -73,6 +73,14 @@ public:
// provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu)
{
assert(0);
}
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
@ -182,14 +190,6 @@ public:
}
#endif
}
// Fixme: Gparity prop * link
static accelerator_inline void multLinkProp(SitePropagator &phi,
const SiteDoubledGaugeField &U,
const SitePropagator &chi,
int mu)
{
assert(0);
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)

View File

@ -83,22 +83,23 @@ public:
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
int mu)
{
auto UU = coalescedRead(U(mu));
mult(&phi(), &UU, &chi());
}
static accelerator_inline void multLinkProp(SitePropagator &phi,
template<class _Spinor>
static accelerator_inline void multLink(_Spinor &phi,
const SiteDoubledGaugeField &U,
const SitePropagator &chi,
int mu)
const _Spinor &chi,
int mu,
StencilEntry *SE,
StencilView &St)
{
mult(&phi(), &U(mu), &chi());
multLink(phi,U,chi,mu);
}
template <class ref>
static accelerator_inline void loadLinkElement(Simd &reg, ref &memory)
{

View File

@ -364,19 +364,19 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSite); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
#endif
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteInt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
#endif
} else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteExt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
#endif
}
assert(0 && " Kernel optimisation case not covered ");
@ -440,7 +440,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(const SitePropagator &
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
Impl::multLink(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
@ -469,7 +469,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(const SitePropagator &
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
Impl::multLink(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign) {
@ -496,7 +496,7 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
{
SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu);
Impl::multLink(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
@ -525,7 +525,7 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu + Nd);
Impl::multLink(result, U[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.

View File

@ -66,6 +66,7 @@ void WilsonKernels<IMPLEMENTATION>::ContractConservedCurrentSiteBwd( const SiteP
HAND_SPECIALISE_GPARITY(IMPLEMENTATION);
template class WilsonKernels<IMPLEMENTATION>;

14
TODO
View File

@ -6,17 +6,20 @@ GPU branch code item work list
* 0) Single GPU
- 128 bit integer table load in GPU code.
- coalescedRead <- threadIdx.x
- Gianluca's changes to Cayley into gpu-port
- GPU accelerate EOFA
- Clean up PRAGMAS, and SIMT_loop
thread_loop interface revisit.
for_n
for
- Staggered kernels -> GPU coalesced loop
- Staggered kernels inline for GPU -- DONE
* 2) 5D terms
* 2) 5D terms & Gianluca
- Cayley coefficients -> GPU retention or prefetch
- Gianluca's changes to Cayley into gpu-port
- GPU accelerate EOFA
- Mobius kernel fusion. -- Gianluca?
- Make GPU offload reductions optionally deterministic -- Gianluca
* 3) Comms/NVlink
- OpenMP tasks to run comms threads.
@ -30,12 +33,9 @@ GPU branch code item work list
* 5) Misc
- SIMD dirs in stencil
- Conserved current clean up.
- multLinkProp eliminate
- Staggered kernels -> GPU coalesced loop
- Staggered kernels inline for GPU -- DONE
- Make GPU offload reductions optionally deterministic -- Gianluca
- SIMD dirs in stencil
7) Accelerate the cshift