From 1282e1067f938abe9681030889f3d169c1299051 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 29 Jul 2019 22:58:35 +0100 Subject: [PATCH] Do the force term on the accelerator too. Needed particularly because comms buffers are device memory. --- Grid/qcd/action/fermion/WilsonKernels.h | 10 ++-- .../WilsonFermion5DImplementation.h | 52 ++++--------------- .../WilsonFermionImplementation.h | 25 +++------ .../WilsonKernelsImplementation.h | 21 +++++++- 4 files changed, 45 insertions(+), 63 deletions(-) diff --git a/Grid/qcd/action/fermion/WilsonKernels.h b/Grid/qcd/action/fermion/WilsonKernels.h index 17c56485..35715097 100644 --- a/Grid/qcd/action/fermion/WilsonKernels.h +++ b/Grid/qcd/action/fermion/WilsonKernels.h @@ -60,9 +60,9 @@ public: int Ls, int Nsite, const FermionField &in, FermionField &out, int interior=1,int exterior=1) ; - static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, - int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); - + static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); + ////////////////////////////////////////////////////////////////////////////// // Utilities for inserting Wilson conserved current. ////////////////////////////////////////////////////////////////////////////// @@ -99,6 +99,10 @@ public: bool switch_sign = false); private: + + static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); + // Specialised variants static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out); diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 3dd68532..1bdc9a64 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -230,38 +230,25 @@ void WilsonFermion5D::DhopDir(const FermionField &in, FermionField &out,in // assert( (disp==1)||(disp==-1) ); // assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; - Compressor compressor(DaggerNo); - Stencil.HaloExchange(in,compressor); - int skip = (disp==1) ? 0 : 1; - int dirdisp = dir+skip*4; int gamma = dir+(1-skip)*4; - assert(dirdisp<=7); - assert(dirdisp>=0); + Compressor compressor(DaggerNo); + Stencil.HaloExchange(in,compressor); + + uint64_t Nsite = Umu.Grid()->oSites(); + Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma); - auto Umu_v = Umu.View(); - auto in_v = in.View(); - auto out_v = out.View(); - thread_for(ss,Umu.Grid()->oSites(),{ - // parallel_for(int ss=0;ssoSites();ss++){ - for(int s=0;s void WilsonFermion5D::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) { DerivCalls++; assert((dag==DaggerNo) ||(dag==DaggerYes)); @@ -296,27 +283,10 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, DerivDhopComputeTime -= usecond(); - auto U_v = U.View(); - auto Btilde_v = Btilde.View(); - auto B_v = B.View(); - int Bsites = B.Grid()->oSites(); int Usites = U.Grid()->oSites(); - thread_for(sss, U.Grid()->oSites(),{ - // parallel_for (int sss = 0; sss < U.Grid()->oSites(); sss++) { - for (int s = 0; s < Ls; s++) { - int sU = sss; - int sF = s + Ls * sU; - assert(sF < Bsites); - assert(sU < Usites); + Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma); - Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma); - // Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); - //////////////////////////// - // spin trace outer product - //////////////////////////// - } - }); //////////////////////////// // spin trace outer product //////////////////////////// diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 0a0cf071..756bdbf4 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -238,16 +238,8 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, int gamma = mu; if (!dag) gamma += Nd; - //////////////////////// - // Call the single hop - //////////////////////// - auto U_v = U.View(); - auto B_v = B.View(); - auto Btilde_v = Btilde.View(); - auto st_v = st.View(); - thread_for( sss, B.Grid()->oSites(), { - Kernels::DhopDirK(st_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma); - }); + int Ls=1; + Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma); ////////////////////////////////////////////////// // spin trace outer product @@ -332,7 +324,8 @@ void WilsonFermion::Mdir(const FermionField &in, FermionField &out, int di } template -void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { +void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) +{ int skip = (disp == 1) ? 0 : 1; int dirdisp = dir + skip * 4; int gamma = dir + (1 - skip) * 4; @@ -346,13 +339,9 @@ void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out, Compressor compressor(dag); Stencil.HaloExchange(in, compressor); - auto in_v = in.View(); - auto out_v = in.View(); - auto Umu_v = Umu.View(); - auto Stencil_v = Stencil.View(); - thread_for(sss, in.Grid()->oSites(),{ - Kernels::DhopDirK(Stencil_v, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma); - }); + int Ls=1; + int Nsite=in.oSites(); + Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma); }; template diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 5eea31f5..d54ccd28 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -320,7 +320,26 @@ void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm); coalescedWrite(out[sF], result,lane); } - + +template +void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, + int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) +{ + assert(dirdisp<=7); + assert(dirdisp>=0); + + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v = out.View(); + auto st_v = st.View(); + accelerator_for(ss,Nsite,1,{ + for(int s=0;s