From 3277bda130291169da1cc176c4b95321030dffe6 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 4 Mar 2018 16:38:08 +0000 Subject: [PATCH] View introduction to prepare for accelerator offload. Probably same problem exists for stencil object --- .../action/fermion/CayleyFermion5Dcache.cc | 44 +++--- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 42 ++++-- .../fermion/DomainWallEOFAFermioncache.cc | 44 +++--- .../fermion/DomainWallEOFAFermionvec.cc | 34 +++-- lib/qcd/action/fermion/FermionOperatorImpl.h | 49 +++--- .../fermion/ImprovedStaggeredFermion.cc | 21 ++- .../fermion/ImprovedStaggeredFermion5D.cc | 17 ++- .../action/fermion/MobiusEOFAFermioncache.cc | 76 ++++++---- .../action/fermion/MobiusEOFAFermionvec.cc | 62 +++++--- lib/qcd/action/fermion/StaggeredKernels.cc | 16 +- lib/qcd/action/fermion/StaggeredKernels.h | 32 ++-- lib/qcd/action/fermion/StaggeredKernelsAsm.cc | 36 ++--- .../action/fermion/StaggeredKernelsHand.cc | 16 +- lib/qcd/action/fermion/WilsonFermion.cc | 72 +++++---- lib/qcd/action/fermion/WilsonFermion5D.cc | 90 +++++++---- lib/qcd/action/fermion/WilsonKernels.cc | 142 +++++++++--------- lib/qcd/action/fermion/WilsonKernels.h | 103 +++++++------ lib/qcd/action/fermion/WilsonKernelsAsm.cc | 48 +++--- lib/qcd/action/fermion/WilsonKernelsHand.cc | 108 ++++++------- 19 files changed, 595 insertions(+), 457 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc index 5f006921..7393b760 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc @@ -38,17 +38,20 @@ NAMESPACE_BEGIN(Grid); // Pminus fowards // Pplus backwards.. template -void CayleyFermion5D::M5D(const FermionField &psi, - const FermionField &phi, - FermionField &chi, +void CayleyFermion5D::M5D(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper) { + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls =this->Ls; - GridBase *grid=psi.Grid(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard()=psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol M5Dcalls++; M5Dtime-=usecond(); @@ -81,17 +84,20 @@ void CayleyFermion5D::M5D(const FermionField &psi, } template -void CayleyFermion5D::M5Ddag(const FermionField &psi, - const FermionField &phi, - FermionField &chi, +void CayleyFermion5D::M5Ddag(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper) { + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls =this->Ls; - GridBase *grid=psi.Grid(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard()=psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol M5Dcalls++; @@ -125,12 +131,14 @@ void CayleyFermion5D::M5Ddag(const FermionField &psi, } template -void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) +void CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi_i) { - GridBase *grid=psi.Grid(); - int Ls=this->Ls; + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); - chi.Checkerboard()=psi.Checkerboard(); + int Ls=this->Ls; MooeeInvCalls++; MooeeInvTime-=usecond(); @@ -170,13 +178,15 @@ void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField & } template -void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) +void CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i) { - GridBase *grid=psi.Grid(); + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); int Ls=this->Ls; + auto psi = psi_i.View(); + auto chi = chi_i.View(); assert(psi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard()=psi.Checkerboard(); std::vector ueec(Ls); std::vector deec(Ls); diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 04732567..a666dc16 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -51,14 +51,18 @@ void CayleyFermion5D::MooeeInv(const FermionField &psi, FermionField &chi) this->MooeeInternal(psi,chi,DaggerNo,InverseYes); } template -void CayleyFermion5D::M5D(const FermionField &psi, - const FermionField &phi, - FermionField &chi, +void CayleyFermion5D::M5D(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper) { - GridBase *grid=psi.Grid(); + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd= Simd::Nsimd(); @@ -70,8 +74,6 @@ void CayleyFermion5D::M5D(const FermionField &psi, assert(Ls/LLs==nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard()=psi.Checkerboard(); - // just directly address via type pun typedef typename Simd::scalar_type scalar_type; scalar_type * u_p = (scalar_type *)&u[0]; @@ -124,7 +126,7 @@ void CayleyFermion5D::M5D(const FermionField &psi, } #else for(int v=0;v::M5D(const FermionField &psi, } template -void CayleyFermion5D::M5Ddag(const FermionField &psi, - const FermionField &phi, - FermionField &chi, +void CayleyFermion5D::M5Ddag(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper) { - GridBase *grid=psi.Grid(); + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi=psi_i.View(); + auto phi=phi_i.View(); + auto chi=chi_i.View(); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd= Simd::Nsimd(); @@ -214,8 +220,6 @@ void CayleyFermion5D::M5Ddag(const FermionField &psi, assert(Ls/LLs==nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard()=psi.Checkerboard(); - // just directly address via type pun typedef typename Simd::scalar_type scalar_type; scalar_type * u_p = (scalar_type *)&u[0]; @@ -339,11 +343,13 @@ void CayleyFermion5D::M5Ddag(const FermionField &psi, #endif template -void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionField &chi, +void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i, int LLs, int site, Vector > &Matp, Vector > &Matm) { + auto psi = psi_i.View(); + auto chi = chi_i.View(); #ifndef AVX512 { SiteHalfSpinor BcastP; @@ -513,11 +519,14 @@ void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionFie // Z-mobius version template -void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionField &chi, +void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i, int LLs, int site, Vector > &Matp, Vector > &Matm) { #ifndef AVX512 { + auto psi = psi_i.View(); + auto chi = chi_i.View(); + SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; SiteHalfSpinor SiteChiP; @@ -761,11 +770,12 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) { + chi.Checkerboard()=psi.Checkerboard(); + int Ls=this->Ls; int LLs = psi.Grid()->_rdimensions[0]; int vol = psi.Grid()->oSites()/LLs; - chi.Checkerboard()=psi.Checkerboard(); Vector > Matp; Vector > Matm; diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc b/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc index 7fa9ce9e..58aee4ff 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc +++ b/lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc @@ -40,18 +40,20 @@ NAMESPACE_BEGIN(Grid); // Pminus fowards // Pplus backwards.. template -void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper) { + chi_i.Checkerboard() = psi_i.Checkerboard(); int Ls = this->Ls; - GridBase* grid = psi.Grid(); - + GridBase* grid = psi_i.Grid(); + auto phi = phi_i.View(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; this->M5Dtime -= usecond(); - + thread_loop( (int ss=0; ssoSites(); ss+=Ls),{ // adds Ls for(int s=0; s::M5D(const FermionField& psi, const FermionFiel } template -void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper) { + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); int Ls = this->Ls; - GridBase* grid = psi.Grid(); + + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard()=psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; @@ -116,16 +122,16 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionF } template -void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) +void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionField& chi_i) { - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi=psi_i.View(); + auto chi=chi_i.View(); int Ls = this->Ls; - chi.Checkerboard() = psi.Checkerboard(); - this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); - thread_loop((int ss=0; ssoSites(); ss+=Ls),{ // adds Ls auto tmp1 = psi[0]; @@ -164,13 +170,15 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField } template -void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) +void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i) { - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; assert(psi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); std::vector ueec(Ls); std::vector deec(Ls+1); diff --git a/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc b/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc index 3d24999c..3d20befe 100644 --- a/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc +++ b/lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc @@ -52,10 +52,15 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField } template -void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper) { - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd = Simd::Nsimd(); @@ -67,8 +72,6 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionFiel assert(Ls/LLs == nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); - // just directly address via type pun typedef typename Simd::scalar_type scalar_type; scalar_type* u_p = (scalar_type*) &u[0]; @@ -197,10 +200,15 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionFiel } template -void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper) { - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd = Simd::Nsimd(); @@ -212,8 +220,6 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionF assert(Ls/LLs == nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); - // just directly address via type pun typedef typename Simd::scalar_type scalar_type; scalar_type* u_p = (scalar_type*) &u[0]; @@ -342,9 +348,12 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionF #endif template -void DomainWallEOFAFermion::MooeeInternalAsm(const FermionField& psi, FermionField& chi, +void DomainWallEOFAFermion::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i, int LLs, int site, Vector >& Matp, Vector >& Matm) { + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); #ifndef AVX512 { SiteHalfSpinor BcastP; @@ -532,12 +541,11 @@ void DomainWallEOFAFermion::MooeeInternalZAsm(const FermionField& psi, Fer template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) { + chi.Checkerboard() = psi.Checkerboard(); int Ls = this->Ls; int LLs = psi.Grid()->_rdimensions[0]; int vol = psi.Grid()->oSites()/LLs; - chi.Checkerboard() = psi.Checkerboard(); - Vector > Matp; Vector > Matm; Vector > *_Matp; diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 9cd1639a..13803285 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -149,8 +149,8 @@ public: typedef typename Impl::Compressor Compressor; \ typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::ImplParams ImplParams; \ - typedef typename Impl::Coeff_t Coeff_t; \ - + typedef typename Impl::Coeff_t Coeff_t; + #define INHERIT_IMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base) \ INHERIT_FIMPL_TYPES(Base) @@ -267,12 +267,14 @@ public: int Ls=Btilde.Grid()->_fdimensions[0]; GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - + auto tmp_v = tmp.View(); + auto Btilde_v = Btilde.View(); + auto Atilde_v = Atilde.View(); thread_loop( (int sss=0;sssoSites();sss++),{ int sU=sss; for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here + tmp_v[sU] = tmp_v[sU]+ traceIndex(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here } }); PokeIndex(mat,tmp,mu); @@ -499,13 +501,10 @@ public: const int Nsimd =vector_type::Nsimd(); - // const int Nsimd = grid->Nsimd(); - - GridBase *grid= St.Grid(); int direction = St._directions[mu]; int distance = St._distances[mu]; int ptype = St._permute_type[mu]; - int sl = grid->_simd_layout[direction]; + int sl = St._simd_layout[direction]; // Fixme X.Y.Z.T hardcode in stencil int mmu = mu % Nd; @@ -524,7 +523,7 @@ public: extract(chi,vals); for(int s=0;siCoorFromIindex(icoor,s); + St.iCoorFromIindex(icoor,s); assert((icoor[direction]==0)||(icoor[direction]==1)); @@ -592,9 +591,13 @@ public: Uconj = where(coor==neglink,-Uconj,Uconj); } - thread_loop( (auto ss=U.begin();ss(outerProduct(Btilde, A)); - thread_loop((auto ss = tmp.begin(); ss < tmp.end(); ss++), { - link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); + auto link_v = link.View(); + auto tmp_v = tmp.View(); + thread_loop((auto ss = tmp_v.begin(); ss < tmp_v.end(); ss++), { + link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); }); PokeIndex(mat, link, mu); return; @@ -641,11 +645,14 @@ public: GaugeLinkField tmp(mat.Grid()); tmp = Zero(); + auto tmp_v = tmp.View(); + auto Atilde_v = Atilde.View(); + auto Btilde_v = Btilde.View(); thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{ for (int s = 0; s < Ls; s++) { int sF = s + Ls * ss; - auto ttmp = traceIndex(outerProduct(Btilde[sF], Atilde[sF])); - tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); + auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); + tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); } }); PokeIndex(mat, tmp, mu); diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc index da68be21..05be76b0 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion.cc @@ -249,8 +249,12 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge //////////////////////// // Call the single hop //////////////////////// + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto B_v = B.View(); + auto Btilde_v = Btilde.View(); thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), { - Kernels::DhopDirK(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1); + Kernels::DhopDirK(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); }); // Force in three link terms @@ -360,9 +364,12 @@ void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionFiel Compressor compressor; Stencil.HaloExchange(in, compressor); - + auto Umu_v = Umu.View(); + auto UUUmu_v = UUUmu.View(); + auto in_v = in.View(); + auto out_v = out.View(); thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , { - Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp); + Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); }); }; @@ -377,13 +384,17 @@ void ImprovedStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder Compressor compressor; st.HaloExchange(in, compressor); + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); if (dag == DaggerYes) { thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), { - Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); + Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); }); } else { thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), { - Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); + Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); }); } }; diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc index 370d8606..f4387cb2 100644 --- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc +++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc @@ -171,12 +171,15 @@ void ImprovedStaggeredFermion5D::DhopDir(const FermionField &in, FermionFi Compressor compressor; Stencil.HaloExchange(in,compressor); - + auto Umu_v = Umu.View(); + auto UUUmu_v = UUUmu.View(); + auto in_v = in.View(); + auto out_v = in.View(); thread_loop( (int ss=0;ssoSites();ss++),{ for(int s=0;s::DhopInternal(StencilImpl & st, LebesgueOr Compressor compressor; int LLs = in.Grid()->_rdimensions[0]; - - DhopTotalTime -= usecond(); DhopCommTime -= usecond(); st.HaloExchange(in,compressor); DhopCommTime += usecond(); DhopComputeTime -= usecond(); + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto out_v = out.View(); + auto in_v = in.View(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion if (dag == DaggerYes) { thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), { int sU=ss; - Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out); + Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v); }); } else { thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{ int sU=ss; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); + Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v); }); } DhopComputeTime += usecond(); diff --git a/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc b/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc index c730e91b..dc865b4f 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermioncache.cc @@ -35,16 +35,18 @@ See the full license in the file "LICENSE" in the top level distribution directo NAMESPACE_BEGIN(Grid); -// FIXME -- make a version of these routines with site loop outermost for cache reuse. template -void MobiusEOFAFermion::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, +void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper) { + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - GridBase *grid = psi.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; @@ -76,16 +78,20 @@ void MobiusEOFAFermion::M5D(const FermionField &psi, const FermionField &p } template -void MobiusEOFAFermion::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, +void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper, std::vector &shift_coeffs) { + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - GridBase *grid = psi.Grid(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; @@ -120,14 +126,17 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi, const FermionFi } template -void MobiusEOFAFermion::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, +void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper) { + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - GridBase *grid = psi.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; @@ -159,16 +168,19 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi, const FermionField } template -void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, +void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, std::vector &lower, std::vector &diag, std::vector &upper, std::vector &shift_coeffs) { + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - GridBase *grid = psi.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // Flops = 6.0*(Nc*Ns) *Ls*vol this->M5Dcalls++; @@ -204,14 +216,15 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi, const Fermio } template -void MobiusEOFAFermion::MooeeInv(const FermionField &psi, FermionField &chi) +void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField &chi_i) { - if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } - - GridBase *grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; + auto psi = psi_i.View(); + auto chi = chi_i.View(); - chi.Checkerboard() = psi.Checkerboard(); + if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; } this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); @@ -251,12 +264,14 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi, FermionField &ch } template -void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi, FermionField &chi) +void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i) { - GridBase *grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; + auto psi = psi_i.View(); + auto chi = chi_i.View(); - chi.Checkerboard() = psi.Checkerboard(); this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); @@ -306,14 +321,15 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi, FermionFie } template -void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi, FermionField &chi) +void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i) { - if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } + if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; } - GridBase *grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - - chi.Checkerboard() = psi.Checkerboard(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); @@ -353,12 +369,14 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi, FermionField } template -void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi, FermionField &chi) +void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i) { - GridBase *grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; - chi.Checkerboard() = psi.Checkerboard(); this->MooeeInvCalls++; this->MooeeInvTime -= usecond(); diff --git a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc index 330dab36..97001033 100644 --- a/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc +++ b/lib/qcd/action/fermion/MobiusEOFAFermionvec.cc @@ -63,10 +63,14 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, Fermion } template -void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +void MobiusEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper) { - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd = Simd::Nsimd(); @@ -78,8 +82,6 @@ void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& p assert(Ls/LLs == nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); - // just directly address via type pun typedef typename Simd::scalar_type scalar_type; scalar_type* u_p = (scalar_type*) &u[0]; @@ -208,11 +210,14 @@ void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& p } template -void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, +void MobiusEOFAFermion::M5D_shift(const FermionField& psi_i, const FermionField& phi_i, + FermionField& chi_i, std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs) { #if 0 + auto & psi = psi_i; + auto & phi = phi_i; + auto & chi = chi_i; this->M5D(psi, phi, chi, lower, diag, upper); @@ -225,8 +230,11 @@ void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionFi } #else - - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd = Simd::Nsimd(); @@ -239,7 +247,6 @@ void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionFi assert(Ls/LLs == nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // just directly address via type pun typedef typename Simd::scalar_type scalar_type; @@ -389,10 +396,14 @@ void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionFi } template -void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) +void MobiusEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper) { - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd = Simd::Nsimd(); @@ -404,7 +415,6 @@ void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField assert(Ls/LLs == nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // just directly address via type pun typedef typename Simd::scalar_type scalar_type; @@ -531,12 +541,14 @@ void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField } template -void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, +void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, + std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs) { #if 0 - + auto & psi = psi_i; + auto & phi = phi_i; + auto & chi = chi_i; this->M5Ddag(psi, phi, chi, lower, diag, upper); // FIXME: possible gain from vectorizing shift operation as well? @@ -548,8 +560,11 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const Fermio } #else - - GridBase* grid = psi.Grid(); + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd = Simd::Nsimd(); @@ -562,7 +577,6 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const Fermio assert(Ls/LLs == nsimd); assert(phi.Checkerboard() == psi.Checkerboard()); - chi.Checkerboard() = psi.Checkerboard(); // just directly address via type pun typedef typename Simd::scalar_type scalar_type; @@ -717,9 +731,11 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const Fermio #endif template -void MobiusEOFAFermion::MooeeInternalAsm(const FermionField& psi, FermionField& chi, +void MobiusEOFAFermion::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i, int LLs, int site, Vector >& Matp, Vector >& Matm) { + auto psi = psi_i.View(); + auto chi = chi_i.View(); #ifndef AVX512 { SiteHalfSpinor BcastP; @@ -909,12 +925,12 @@ void MobiusEOFAFermion::MooeeInternalZAsm(const FermionField& psi, Fermion template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) { + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; int LLs = psi.Grid()->_rdimensions[0]; int vol = psi.Grid()->oSites()/LLs; - chi.Checkerboard() = psi.Checkerboard(); - Vector> Matp; Vector> Matm; Vector>* _Matp; diff --git a/lib/qcd/action/fermion/StaggeredKernels.cc b/lib/qcd/action/fermion/StaggeredKernels.cc index 832a3066..0dcb4ff7 100644 --- a/lib/qcd/action/fermion/StaggeredKernels.cc +++ b/lib/qcd/action/fermion/StaggeredKernels.cc @@ -40,9 +40,9 @@ StaggeredKernels::StaggeredKernels(const ImplParams &p) : Base(p){}; //////////////////////////////////////////// template -void StaggeredKernels::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +void StaggeredKernels::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor *buf, int sF, - int sU, const FermionField &in, SiteSpinor &out,int threeLink) { + int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) { const SiteSpinor *chi_p; SiteSpinor chi; SiteSpinor Uchi; @@ -183,9 +183,9 @@ void StaggeredKernels::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D }; template -void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, +void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out) { + const FermionFieldView &in, FermionFieldView &out) { SiteSpinor naik; SiteSpinor naive; int oneLink =0; @@ -221,9 +221,9 @@ void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou }; template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, +void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { int oneLink =0; int threeLink=1; @@ -258,8 +258,8 @@ void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double }; template -void StaggeredKernels::DhopDirK( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out, int dir, int disp) +void StaggeredKernels::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp) { // Disp should be either +1,-1,+3,-3 // What about "dag" ? diff --git a/lib/qcd/action/fermion/StaggeredKernels.h b/lib/qcd/action/fermion/StaggeredKernels.h index e07dd402..9b5e618b 100644 --- a/lib/qcd/action/fermion/StaggeredKernels.h +++ b/lib/qcd/action/fermion/StaggeredKernels.h @@ -46,30 +46,34 @@ public: INHERIT_IMPL_TYPES(Impl); typedef FermionOperator Base; + + typedef typename ViewMap::Type FermionFieldView; + typedef typename ViewMap::Type DoubledGaugeFieldView; + public: - void DhopDirK(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp); + void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); - void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf, - int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink); + void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink); - void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf, - int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink); + void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, SiteSpinor&out,int threeLink); - void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf, - int LLs, int sU, const FermionField &in, FermionField &out, int dag); + void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,SiteSpinor * buf, + int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag); - void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf, - int LLs, int sU, const FermionField &in, FermionField &out); + void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, + int LLs, int sU, const FermionFieldView &in, FermionFieldView &out); - void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, - int LLs, int sU, const FermionField &in, FermionField &out); + void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, + int LLs, int sU, const FermionFieldView &in, FermionFieldView &out); public: diff --git a/lib/qcd/action/fermion/StaggeredKernelsAsm.cc b/lib/qcd/action/fermion/StaggeredKernelsAsm.cc index d7fa50c5..684c0f79 100644 --- a/lib/qcd/action/fermion/StaggeredKernelsAsm.cc +++ b/lib/qcd/action/fermion/StaggeredKernelsAsm.cc @@ -580,10 +580,10 @@ NAMESPACE_BEGIN(Grid); template void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, - DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { assert(0); }; @@ -644,10 +644,10 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, // This is the single precision 5th direction vectorised kernel #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, - DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; @@ -694,10 +694,10 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, - DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; @@ -775,10 +775,10 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, - DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; @@ -840,10 +840,10 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, - DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; @@ -905,10 +905,10 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \ template void CLASS::FUNC(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeField &U, \ - DoubledGaugeField &UUU, \ + DoubledGaugeFieldView &U, \ + DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, \ - int sU, const FermionField &in, FermionField &out); + int sU, const FermionFieldView &in, FermionFieldView &out); KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD); KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF); diff --git a/lib/qcd/action/fermion/StaggeredKernelsHand.cc b/lib/qcd/action/fermion/StaggeredKernelsHand.cc index c2c99534..5e18f0ab 100644 --- a/lib/qcd/action/fermion/StaggeredKernelsHand.cc +++ b/lib/qcd/action/fermion/StaggeredKernelsHand.cc @@ -89,9 +89,9 @@ Author: paboyle NAMESPACE_BEGIN(Grid); template -void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, +void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out, int dag) + int sU, const FermionFieldView &in, FermionFieldView &out, int dag) { SiteSpinor naik; SiteSpinor naive; @@ -110,9 +110,9 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, Do } template -void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor *buf, int sF, - int sU, const FermionField &in, SiteSpinor &out,int threeLink) + int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -298,14 +298,14 @@ void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l #define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeField &U,DoubledGaugeField &UUU, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, \ - int sU, const FermionField &in, FermionField &out, int dag); + int sU, const FermionFieldView &in, FermionFieldView &out, int dag); #define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \ - template void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \ + template void StaggeredKernels::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, \ SiteSpinor *buf, int sF, \ - int sU, const FermionField &in, SiteSpinor &out,int threeLink) ; + int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) ; DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD); DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF); DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD); diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index d92cdf55..21a919cf 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -216,8 +216,11 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, //////////////////////// // Call the single hop //////////////////////// + auto U_v = U.View(); + auto B_v = B.View(); + auto Btilde_v = Btilde.View(); thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{ - Kernels::DhopDirK(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma); + Kernels::DhopDirK(st, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma); }); ////////////////////////////////////////////////// @@ -316,9 +319,11 @@ void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out, Compressor compressor(dag); Stencil.HaloExchange(in, compressor); - + auto in_v = in.View(); + auto out_v = in.View(); + auto Umu_v = Umu.View(); thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{ - Kernels::DhopDirK(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma); + Kernels::DhopDirK(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma); }); }; @@ -333,13 +338,16 @@ void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, st.HaloExchange(in, compressor); int Opt = WilsonKernelsStatic::Opt; + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v= out.View(); if (dag == DaggerYes) { - accelerator_loop( sss,in, { - Kernels::DhopSiteDag(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); + accelerator_loop( sss,in_v, { + Kernels::DhopSiteDag(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v); }); } else { - accelerator_loop( sss,in, { - Kernels::DhopSite(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); + accelerator_loop( sss,in_v, { + Kernels::DhopSite(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v); }); } }; @@ -367,15 +375,21 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, // Inefficient comms method but not performance critical. tmp1 = Cshift(q_in_1, mu, 1); tmp2 = Cshift(q_in_2, mu, 1); + auto tmp1_v = tmp1.View(); + auto tmp2_v = tmp2.View(); + auto q_in_1_v=q_in_1.View(); + auto q_in_2_v=q_in_2.View(); + auto q_out_v = q_out.View(); + auto Umu_v = Umu.View(); thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), { - Kernels::ContractConservedCurrentSiteFwd(tmp1[sU], - q_in_2[sU], - q_out[sU], - Umu, sU, mu); - Kernels::ContractConservedCurrentSiteBwd(q_in_1[sU], - tmp2[sU], - q_out[sU], - Umu, sU, mu); + Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU], + q_in_2_v[sU], + q_out_v[sU], + Umu_v, sU, mu); + Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU], + tmp2_v[sU], + q_out_v[sU], + Umu_v, sU, mu); }); } @@ -415,34 +429,40 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, tmp = ph*q_in; tmpBwd = Cshift(tmp, mu, -1); + auto coords_v = coords.View(); + auto tmpFwd_v = tmpFwd.View(); + auto tmpBwd_v = tmpBwd.View(); + auto Umu_v = Umu.View(); + auto q_out_v = q_out.View(); + thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), { // Compute the sequential conserved current insertion only if our simd // object contains a timeslice we need. - vInteger t_mask = ((coords[sU] >= tmin) && - (coords[sU] <= tmax)); + vInteger t_mask = ((coords_v[sU] >= tmin) && + (coords_v[sU] <= tmax)); Integer timeSlices = Reduce(t_mask); if (timeSlices > 0) { - Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sU], - q_out[sU], - Umu, sU, mu, t_mask); + Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU], + q_out_v[sU], + Umu_v, sU, mu, t_mask); } // Repeat for backward direction. - t_mask = ((coords[sU] >= (tmin + tshift)) && - (coords[sU] <= (tmax + tshift))); + t_mask = ((coords_v[sU] >= (tmin + tshift)) && + (coords_v[sU] <= (tmax + tshift))); //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 )); + if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 )); timeSlices = Reduce(t_mask); if (timeSlices > 0) { - Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sU], - q_out[sU], - Umu, sU, mu, t_mask); + Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU], + q_out_v[sU], + Umu_v, sU, mu, t_mask); } }); } diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d50c73aa..8c0f5a90 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -244,11 +244,14 @@ void WilsonFermion5D::DhopDir(const FermionField &in, FermionField &out,in assert(dirdisp<=7); assert(dirdisp>=0); + auto Umu_v = Umu.View(); + auto in_v = in.View(); + auto out_v = out.View(); thread_loop( (int ss=0;ssoSites();ss++),{ for(int s=0;s::DerivInternal(StencilImpl & st, Atilde=A; int LLs = B.Grid()->_rdimensions[0]; - DerivComputeTime-=usecond(); for (int mu = 0; mu < Nd; mu++) { //////////////////////////////////////////////////////////////////////// @@ -293,15 +295,20 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, //////////////////////// DerivDhopComputeTime -= usecond(); + auto U_v = U.View(); + auto Btilde_v = Btilde.View(); + auto B_v = B.View(); + int Bsites = B.Grid()->oSites(); + int Usites = U.Grid()->oSites(); thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{ for (int s = 0; s < Ls; s++) { int sU = sss; int sF = s + Ls * sU; - assert(sF < B.Grid()->oSites()); - assert(sU < U.Grid()->oSites()); + assert(sF < Bsites); + assert(sU < Usites); - Kernels::DhopDirK(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); + Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma); //////////////////////////// // spin trace outer product @@ -406,6 +413,9 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg ////////////////////////////////////////////////////////////////////////////////////////////////////// // Ugly explicit thread mapping introduced for OPA reasons. ////////////////////////////////////////////////////////////////////////////////////////////////////// + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v = out.View(); #pragma omp parallel reduction(max:ctime) reduction(max:ptime) { int tid = omp_get_thread_num(); @@ -435,13 +445,13 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg for (int ss = myblock; ss < myblock+myn; ++ss) { int sU = ss; int sF = LLs * sU; - Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0); } } else { for (int ss = myblock; ss < myblock+myn; ++ss) { int sU = ss; int sF = LLs * sU; - Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); + Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0); } } ptime = usecond() - start; @@ -470,14 +480,14 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg thread_loop( (int ss = 0; ss < sz; ss++) ,{ int sU = st.surface_list[ss]; int sF = LLs * sU; - Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); + Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1); }); } else { int sz=st.surface_list.size(); thread_loop( (int ss = 0; ss < sz; ss++) ,{ int sU = st.surface_list[ss]; int sF = LLs * sU; - Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); + Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1); }); } DhopComputeTime2+=usecond(); @@ -505,17 +515,20 @@ void WilsonFermion5D::DhopInternalSerialComms(StencilImpl & st, LebesgueOr // Dhop takes the 4d grid from U, and makes a 5d index for fermion int Opt = WilsonKernelsStatic::Opt; + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v = out.View(); if (dag == DaggerYes) { - accelerator_loop( ss, U, { + accelerator_loop( ss, U_v, { int sU = ss; int sF = LLs * sU; - Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); + Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v); }); } else { - accelerator_loop( ss, U , { + accelerator_loop( ss, U_v , { int sU = ss; int sF = LLs * sU; - Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); + Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v); }); } DhopComputeTime+=usecond(); @@ -738,10 +751,17 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, unsigned int LLs = q_in_1.Grid()->_rdimensions[0]; q_out = Zero(); + auto q_in_1_v = q_in_1.View(); + auto q_in_2_v = q_in_2.View(); + auto tmp1_v = tmp1.View(); + auto tmp2_v = tmp2.View(); + auto q_out_v = q_out.View(); + auto Umu_v = Umu.View(); // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s), // q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one. tmp1 = Cshift(q_in_1, mu + 1, 1); tmp2 = Cshift(q_in_2, mu + 1, 1); + thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), { unsigned int sF1 = sU * LLs; unsigned int sF2 = (sU + 1) * LLs - 1; @@ -755,20 +775,20 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, // If vectorised in 5th dimension, reverse q2 vector to match up // sites correctly. if (Impl::LsVectorised) { - REVERSE_LS(q_in_2[sF2], qSite2, Ls / LLs); - REVERSE_LS(tmp2[sF2], qmuSite2, Ls / LLs); + REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs); + REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs); } else { - qSite2 = q_in_2[sF2]; - qmuSite2 = tmp2[sF2]; + qSite2 = q_in_2_v[sF2]; + qmuSite2 = tmp2_v[sF2]; } - Kernels::ContractConservedCurrentSiteFwd(tmp1[sF1], + Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1], qSite2, - q_out[sU], - Umu, sU, mu, axial_sign); - Kernels::ContractConservedCurrentSiteBwd(q_in_1[sF1], + q_out_v[sU], + Umu_v, sU, mu, axial_sign); + Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1], qmuSite2, - q_out[sU], - Umu, sU, mu, axial_sign); + q_out_v[sU], + Umu_v, sU, mu, axial_sign); sF1++; sF2--; } @@ -808,7 +828,7 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, q_out = Zero(); LatticeInteger coords(_FourDimGrid); LatticeCoordinate(coords, Tp); - + auto coords_v = coords.View(); // Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu // by one. tmp = Cshift(q_in, mu + 1, 1); @@ -816,11 +836,15 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, tmp = ph*q_in; tmpBwd = Cshift(tmp, mu + 1, -1); + auto tmpBwd_v = tmpBwd.View(); + auto tmpFwd_v = tmpFwd.View(); + auto q_out_v = q_out.View(); + auto Umu_v = Umu.View(); thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{ // Compute the sequential conserved current insertion only if our simd // object contains a timeslice we need. - vInteger t_mask = ((coords[sU] >= tmin) && - (coords[sU] <= tmax)); + vInteger t_mask = ((coords_v[sU] >= tmin) && + (coords_v[sU] <= tmax)); Integer timeSlices = Reduce(t_mask); if (timeSlices > 0) { @@ -828,20 +852,20 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, unsigned int sF = sU * LLs; for (unsigned int s = 0; s < LLs; ++s) { bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); - Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sF], - q_out[sF], Umu, sU, + Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sF], + q_out_v[sF], Umu_v, sU, mu, t_mask, axial_sign); ++sF; } } // Repeat for backward direction. - t_mask = ((coords[sU] >= (tmin + tshift)) && - (coords[sU] <= (tmax + tshift))); + t_mask = ((coords_v[sU] >= (tmin + tshift)) && + (coords_v[sU] <= (tmax + tshift))); //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 )); + if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 )); timeSlices = Reduce(t_mask); @@ -849,8 +873,8 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, unsigned int sF = sU * LLs; for (unsigned int s = 0; s < LLs; ++s) { bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); - Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sF], - q_out[sF], Umu, sU, + Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sF], + q_out_v[sF], Umu_v, sU, mu, t_mask, axial_sign); ++sF; } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 57e5a017..2e9e4fb3 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -36,7 +36,7 @@ int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric; int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute; template -accelerator WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; +WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; //////////////////////////////////////////// // Generic implementation; move to different file? @@ -103,9 +103,9 @@ accelerator WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; // All legs kernels ; comms then compute //////////////////////////////////////////////////////////////////// template -accelerator void WilsonKernels::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +accelerator void WilsonKernels::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; @@ -127,9 +127,9 @@ accelerator void WilsonKernels::GenericDhopSiteDag(StencilImpl &st, Lebesg }; template -accelerator void WilsonKernels::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +accelerator void WilsonKernels::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; @@ -153,9 +153,9 @@ accelerator void WilsonKernels::GenericDhopSite(StencilImpl &st, LebesgueO // Interior kernels //////////////////////////////////////////////////////////////////// template -accelerator void WilsonKernels::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +accelerator void WilsonKernels::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; @@ -178,9 +178,9 @@ accelerator void WilsonKernels::GenericDhopSiteDagInt(StencilImpl &st, Leb }; template -accelerator void WilsonKernels::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +accelerator void WilsonKernels::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; @@ -204,9 +204,9 @@ accelerator void WilsonKernels::GenericDhopSiteInt(StencilImpl &st, Lebesg // Exterior kernels //////////////////////////////////////////////////////////////////// template -accelerator void WilsonKernels::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +accelerator void WilsonKernels::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { // SiteHalfSpinor tmp; // SiteHalfSpinor chi; @@ -231,9 +231,9 @@ accelerator void WilsonKernels::GenericDhopSiteDagExt(StencilImpl &st, Leb }; template -accelerator void WilsonKernels::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, +accelerator void WilsonKernels::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { // SiteHalfSpinor tmp; // SiteHalfSpinor chi; @@ -258,9 +258,9 @@ accelerator void WilsonKernels::GenericDhopSiteExt(StencilImpl &st, Lebesg }; template -accelerator void WilsonKernels::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out, int dir, int gamma) { - +accelerator void WilsonKernels::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) +{ SiteHalfSpinor tmp; SiteHalfSpinor chi; SiteSpinor result; @@ -300,23 +300,23 @@ void WilsonKernels::ContractConservedCurrentSiteFwd( const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign) { - SitePropagator result, tmp; - Gamma g5(Gamma::Algebra::Gamma5); - Impl::multLinkProp(tmp, U[sU], q_in_1, mu); - result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); - if (switch_sign) - { - q_out -= result; - } - else - { - q_out += result; - } + SitePropagator result, tmp; + Gamma g5(Gamma::Algebra::Gamma5); + + Impl::multLinkProp(tmp, U[sU], q_in_1, mu); + + result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); + + if (switch_sign) { + q_out -= result; + } else { + q_out += result; + } } /******************************************************************************* @@ -330,23 +330,22 @@ void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign) { - SitePropagator result, tmp; - Gamma g5(Gamma::Algebra::Gamma5); - Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd); - result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); - if (switch_sign) - { - q_out += result; - } - else - { - q_out -= result; - } + SitePropagator result, tmp; + Gamma g5(Gamma::Algebra::Gamma5); + + Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd); + + result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); + if (switch_sign) { + q_out += result; + } else { + q_out -= result; + } } // G-parity requires more specialised implementation. @@ -356,7 +355,7 @@ void WilsonKernels::ContractConservedCurrentSiteFwd( \ const SitePropagator &q_in_1, \ const SitePropagator &q_in_2, \ SitePropagator &q_out, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ unsigned int sU, \ unsigned int mu, \ bool switch_sign) \ @@ -368,7 +367,7 @@ void WilsonKernels::ContractConservedCurrentSiteBwd( \ const SitePropagator &q_in_1, \ const SitePropagator &q_in_2, \ SitePropagator &q_out, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ unsigned int mu, \ unsigned int sU, \ bool switch_sign) \ @@ -391,27 +390,25 @@ NO_CURR_SITE(GparityWilsonImplDF); template void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, vInteger t_mask, bool switch_sign) { - SitePropagator result; - Impl::multLinkProp(result, U[sU], q_in, mu); - result = WilsonCurrentFwd(result, mu); + SitePropagator result; + + Impl::multLinkProp(result, U[sU], q_in, mu); + result = WilsonCurrentFwd(result, mu); - // Zero any unwanted timeslice entries. - result = predicatedWhere(t_mask, result, 0.*result); - - if (switch_sign) - { - q_out -= result; - } - else - { - q_out += result; - } + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); + + if (switch_sign) { + q_out -= result; + } else { + q_out += result; + } } /******************************************************************************* @@ -423,27 +420,24 @@ void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, template void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, vInteger t_mask, bool switch_sign) { - SitePropagator result; - Impl::multLinkProp(result, U[sU], q_in, mu + Nd); - result = WilsonCurrentBwd(result, mu); + SitePropagator result; + Impl::multLinkProp(result, U[sU], q_in, mu + Nd); + result = WilsonCurrentBwd(result, mu); - // Zero any unwanted timeslice entries. - result = predicatedWhere(t_mask, result, 0.*result); - - if (switch_sign) - { - q_out += result; - } - else - { - q_out -= result; - } + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); + + if (switch_sign) { + q_out += result; + } else { + q_out -= result; + } } FermOpTemplateInstantiate(WilsonKernels); diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index b0aa7db1..0f4b387e 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -50,13 +50,16 @@ public: INHERIT_IMPL_TYPES(Impl); typedef FermionOperator Base; + + typedef typename ViewMap::Type FermionFieldView; + typedef typename ViewMap::Type DoubledGaugeFieldView; public: template accelerator typename std::enable_if::type - DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) + DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { bgq_l1p_optimisation(1); switch(Opt) { @@ -99,8 +102,8 @@ public: template accelerator typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type - DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) { + DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) { // no kernel choice for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { @@ -116,8 +119,8 @@ public: template accelerator typename std::enable_if::type - DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) + DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { bgq_l1p_optimisation(1); switch(Opt) { @@ -161,8 +164,8 @@ public: template accelerator typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type - DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { + DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) { for (int site = 0; site < Nsite; site++) { for (int s = 0; s < Ls; s++) { @@ -176,8 +179,8 @@ public: } } - accelerator void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); + accelerator void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); ////////////////////////////////////////////////////////////////////////////// // Utilities for inserting Wilson conserved current. @@ -185,27 +188,27 @@ public: void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign = false); void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign = false); void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, vInteger t_mask, bool switch_sign = false); void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, vInteger t_mask, @@ -213,60 +216,60 @@ public: private: // Specialised variants - accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out); + accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); - accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out); + accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); - accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out); + accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); - accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out); + accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); - accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out); + accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); - accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out); + accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); - accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); public: diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index ca14bac3..3dd46934 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -38,43 +38,43 @@ NAMESPACE_BEGIN(Grid); // Default to no assembler implementation /////////////////////////////////////////////////////////// template void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } template void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) { assert(0); } @@ -83,21 +83,21 @@ WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl #include #define INSTANTIATE_ASM(A)\ -template void WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\ \ -template void WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ -template void WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\ +template void WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\ \ -template void WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ -template void WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\ +template void WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\ \ -template void WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\ INSTANTIATE_ASM(WilsonImplF); INSTANTIATE_ASM(WilsonImplD); diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 9a58a494..b4e24b76 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -573,8 +573,8 @@ Author: paboyle NAMESPACE_BEGIN(Grid); template void -WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::scalar_type S; @@ -600,8 +600,8 @@ WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge } template -void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -626,8 +626,8 @@ void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub } template void -WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::scalar_type S; @@ -654,8 +654,8 @@ WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa } template -void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -681,8 +681,8 @@ void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D } template void -WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::scalar_type S; @@ -711,8 +711,8 @@ WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa } template -void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -746,58 +746,58 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D template<> void \ WilsonKernels::HandDhopSite(StencilImpl &st, \ LebesgueOrder &lo, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ SiteHalfSpinor *buf, \ int sF,int sU, \ - const FermionField &in, \ - FermionField &out){ assert(0); } \ + const FermionFieldView &in, \ + FermionFieldView &out){ assert(0); } \ template<> void \ WilsonKernels::HandDhopSiteDag(StencilImpl &st, \ LebesgueOrder &lo, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ SiteHalfSpinor *buf, \ int sF,int sU, \ - const FermionField &in, \ - FermionField &out){ assert(0); } \ + const FermionFieldView &in, \ + FermionFieldView &out){ assert(0); } \ template<> void \ WilsonKernels::HandDhopSiteInt(StencilImpl &st, \ LebesgueOrder &lo, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ SiteHalfSpinor *buf, \ int sF,int sU, \ - const FermionField &in, \ - FermionField &out){ assert(0); } \ + const FermionFieldView &in, \ + FermionFieldView &out){ assert(0); } \ template<> void \ WilsonKernels::HandDhopSiteExt(StencilImpl &st, \ LebesgueOrder &lo, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ SiteHalfSpinor *buf, \ int sF,int sU, \ - const FermionField &in, \ - FermionField &out){ assert(0); } \ + const FermionFieldView &in, \ + FermionFieldView &out){ assert(0); } \ template<> void \ WilsonKernels::HandDhopSiteDagInt(StencilImpl &st, \ LebesgueOrder &lo, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ SiteHalfSpinor *buf, \ int sF,int sU, \ - const FermionField &in, \ - FermionField &out){ assert(0); } \ + const FermionFieldView &in, \ + FermionFieldView &out){ assert(0); } \ template<> void \ WilsonKernels::HandDhopSiteDagExt(StencilImpl &st, \ LebesgueOrder &lo, \ - DoubledGaugeField &U, \ + DoubledGaugeFieldView &U, \ SiteHalfSpinor *buf, \ int sF,int sU, \ - const FermionField &in, \ - FermionField &out){ assert(0); } \ + const FermionFieldView &in, \ + FermionFieldView &out){ assert(0); } \ #define HAND_SPECIALISE_GPARITY(IMPL) \ template<> void \ - WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -812,8 +812,8 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D } \ \ template<> \ - void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -828,8 +828,8 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D } \ \ template<> void \ - WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -844,8 +844,8 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D } \ \ template<> \ - void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -860,8 +860,8 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D } \ \ template<> void \ - WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -877,8 +877,8 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ } \ template<> \ - void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -904,18 +904,18 @@ HAND_SPECIALISE_GPARITY(GparityWilsonImplDF); ////////////// Wilson ; uses this implementation ///////////////////// #define INSTANTIATE_THEM(A) \ -template void WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out);\ -template void WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out); +template void WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \ +template void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out);\ +template void WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \ +template void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \ +template void WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \ +template void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out); INSTANTIATE_THEM(WilsonImplF); INSTANTIATE_THEM(WilsonImplD);