1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Generic or GPU ready for benchmark test on GPU

This commit is contained in:
Peter Boyle 2019-06-05 00:13:52 +01:00
parent ba4fd756b9
commit 6379651cdd

View File

@ -88,6 +88,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
#define GPU_COALESCED_STENCIL_LEG_PROJ(Dir,spProj) \ #define GPU_COALESCED_STENCIL_LEG_PROJ(Dir,spProj) \
if (SE._is_local) { \ if (SE._is_local) { \
auto in_t = in[SE._offset+s]; \ auto in_t = in[SE._offset+s]; \
decltype(chi) tmp; \
if (SE._permute) { \ if (SE._permute) { \
spProj(tmp, in_t); \ spProj(tmp, in_t); \
permute(chi, tmp, ptype); \ permute(chi, tmp, ptype); \
@ -105,9 +106,16 @@ accelerator_inline void WilsonKernels<Impl>::GpuDhopSiteDag(StencilView &st, Sit
SiteHalfSpinor *buf, int Ls, int s, SiteHalfSpinor *buf, int Ls, int s,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
#ifdef __CUDA_ARCH__
typename SiteHalfSpinor::scalar_object chi; typename SiteHalfSpinor::scalar_object chi;
typename SiteHalfSpinor::scalar_object Uchi; typename SiteHalfSpinor::scalar_object Uchi;
typename SiteSpinor::scalar_object result; typename SiteSpinor::scalar_object result;
#else
SiteHalfSpinor chi;
SiteHalfSpinor Uchi;
SiteSpinor result;
#endif
typedef typename SiteSpinor::scalar_type scalar_type; typedef typename SiteSpinor::scalar_type scalar_type;
typedef typename SiteSpinor::vector_type vector_type; typedef typename SiteSpinor::vector_type vector_type;
constexpr int Nsimd = sizeof(vector_type)/sizeof(scalar_type); constexpr int Nsimd = sizeof(vector_type)/sizeof(scalar_type);
@ -178,9 +186,15 @@ accelerator_inline void WilsonKernels<Impl>::GpuDhopSite(StencilView &st, SiteDo
SiteHalfSpinor *buf, int Ls, int s, SiteHalfSpinor *buf, int Ls, int s,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
#ifdef __CUDA_ARCH__
typename SiteHalfSpinor::scalar_object chi; typename SiteHalfSpinor::scalar_object chi;
typename SiteHalfSpinor::scalar_object Uchi; typename SiteHalfSpinor::scalar_object Uchi;
typename SiteSpinor::scalar_object result; typename SiteSpinor::scalar_object result;
#else
SiteHalfSpinor chi;
SiteHalfSpinor Uchi;
SiteSpinor result;
#endif
typedef typename SiteSpinor::scalar_type scalar_type; typedef typename SiteSpinor::scalar_type scalar_type;
typedef typename SiteSpinor::vector_type vector_type; typedef typename SiteSpinor::vector_type vector_type;
constexpr int Nsimd = sizeof(vector_type)/sizeof(scalar_type); constexpr int Nsimd = sizeof(vector_type)/sizeof(scalar_type);
@ -282,27 +296,25 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
auto in_v = in.View(); auto in_v = in.View();
auto out_v = out.View(); auto out_v = out.View();
auto st_v = st.View(); auto st_v = st.View();
if ( (Opt == WilsonKernelsStatic::OptGpu) && interior && exterior ) { if ( (Opt == WilsonKernelsStatic::OptGpu) && interior && exterior ) {
const uint64_t nsimd = Simd::Nsimd(); #define KERNEL_CALL(A) \
const uint64_t NN = Nsite*Ls*nsimd; const uint64_t nsimd = Simd::Nsimd(); \
accelerator_loopN( sss, NN, { const uint64_t NN = Nsite*Ls*nsimd;\
uint64_t cur = sss; accelerator_loopN( sss, NN, { \
// uint64_t lane = cur % nsimd; uint64_t cur = sss; \
cur = cur / nsimd; cur = cur / nsimd; \
uint64_t s = cur%Ls; uint64_t s = cur%Ls; \
// uint64_t sF = cur; cur = cur / Ls; \
cur = cur / Ls; uint64_t sU = cur;
uint64_t sU = cur;
WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v); WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);
}); });
} else { } else {
/*
accelerator_loop( ss, U_v, { accelerator_loop( ss, U_v, {
int sU = ss; int sU = ss;
int sF = Ls * sU; int sF = Ls * sU;
WilsonKernels<Impl>::DhopSite(Opt,st_v,U_v,st.CommBuf(),sF,sU,Ls,1,in_v,out_v); WilsonKernels<Impl>::GenericDhopSite(Opt,st_v,U_v,st.CommBuf(),sF,sU,Ls,1,in_v,out_v);
}); });
*/
} }
} }
template <class Impl> template <class Impl>
@ -329,11 +341,11 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v); WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);
}); });
} else { } else {
// accelerator_loop( ss, U_v, { accelerator_loop( ss, U_v, {
// int sU = ss; int sU = ss;
// int sF = Ls * sU; int sF = Ls * sU;
// WilsonKernels<Impl>::DhopSiteDag(Opt,st,U_v,st.CommBuf(),sF,sU,Ls,1,in_v,out_v); WilsonKernels<Impl>::GenericDhopSiteDag(Opt,st,U_v,st.CommBuf(),sF,sU,Ls,1,in_v,out_v);
// }); });
} }
} }