1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-08-06 14:37:11 +01:00

Finally starting to get decent performance on Volta

This commit is contained in:
Peter Boyle
2018-07-13 12:06:18 -04:00
parent 2cc07450f4
commit b2b5137d28
5 changed files with 203 additions and 98 deletions

View File

@@ -104,6 +104,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, Double
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@@ -121,6 +122,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, Double
GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm);
vstream(out[sF], result);
#endif
};
template <class Impl>
@@ -128,6 +130,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGa
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@@ -145,6 +148,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGa
GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm);
vstream(out[sF], result);
#endif
};
////////////////////////////////////////////////////////////////////
// Interior kernels
@@ -154,6 +158,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, Do
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@@ -172,6 +177,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, Do
GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm);
vstream(out[sF], result);
#endif
};
template <class Impl>
@@ -179,6 +185,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, Doubl
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@@ -196,6 +203,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, Doubl
GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm);
GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm);
vstream(out[sF], result);
#endif
};
////////////////////////////////////////////////////////////////////
// Exterior kernels
@@ -205,6 +213,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, Do
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
// SiteHalfSpinor tmp;
// SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@@ -225,6 +234,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, Do
if ( nmu ) {
out[sF] = out[sF] + result;
}
#endif
};
template <class Impl>
@@ -232,6 +242,7 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, Doubl
SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifndef GRID_NVCC
// SiteHalfSpinor tmp;
// SiteHalfSpinor chi;
SiteHalfSpinor *chi_p;
@@ -252,12 +263,14 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, Doubl
if ( nmu ) {
out[sF] = out[sF] + result;
}
#endif
};
template <class Impl>
accelerator void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
#ifndef GRID_NVCC
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteSpinor result;
@@ -275,6 +288,7 @@ accelerator void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFie
GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm);
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
vstream(out[sF], result);
#endif
}
/*******************************************************************************