From 1299225105e28c9e8a3c2e4e805365a13897202b Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Sat, 15 Jun 2019 09:03:46 +0100 Subject: [PATCH] Accelerator loop changes --- .../WilsonKernelsImplementation.h | 58 +++++++------------ 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index d99653c9..26ca7750 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -320,34 +320,20 @@ void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm); coalescedWrite(out[sF], result,lane); } - - -#define KERNEL_CALL(A) \ - const uint64_t nsimd = Simd::Nsimd(); \ - const uint64_t NN = Nsite*Ls*nsimd;\ - accelerator_loopN( sss, NN, { \ - uint64_t cur = sss; \ - cur = cur / nsimd; \ - uint64_t s = cur%Ls; \ - cur = cur / Ls; \ - uint64_t sU = cur; \ - WilsonKernels::A(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);\ - }); -#define HOST_CALL(A) \ - const uint64_t nsimd = Simd::Nsimd(); \ +#define KERNEL_CALL(A) \ const uint64_t NN = Nsite*Ls; \ - SIMT_loop( ss, NN, nsimd, { \ + accelerator_for( ss, NN, Simd::Nsimd(), { \ int sF = ss; \ int sU = ss/Ls; \ - WilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + WilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ }); -#define ASM_CALL(A) \ - SIMT_loop( ss, Nsite, { \ - int sU = ss; \ - int sF = ss*Ls; \ - WilsonKernels::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \ +#define ASM_CALL(A) \ + thread_for( ss, Nsite, { \ + int sU = ss; \ + int sF = ss*Ls; \ + WilsonKernels::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \ }); template @@ -361,21 +347,21 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField auto st_v = st.View(); if( interior && exterior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSite); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} #ifndef GRID_NVCC - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSite); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;} #endif } else if( interior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteInt); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteInt); return;} #ifndef GRID_NVCC - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteInt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;} #endif } else if( exterior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteExt); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} #ifndef GRID_NVCC - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteExt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;} #endif } @@ -392,21 +378,21 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField auto st_v = st.View(); if( interior && exterior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteDag); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;} #ifndef GRID_NVCC - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteDag); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;} #endif } else if( interior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteDagInt); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} #ifndef GRID_NVCC - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteDagInt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} #endif } else if( exterior ) { - if (Opt == WilsonKernelsStatic::OptGeneric ) { HOST_CALL(GenericDhopSiteDagExt); return;} + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} #ifndef GRID_NVCC - if (Opt == WilsonKernelsStatic::OptHandUnroll ) { HOST_CALL(HandDhopSiteDagExt); return;} + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} #endif } @@ -491,7 +477,7 @@ void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, - vInteger t_mask, + vPredicate t_mask, bool switch_sign) { SitePropagator result; @@ -521,7 +507,7 @@ void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, - vInteger t_mask, + vPredicate t_mask, bool switch_sign) { SitePropagator result;