1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

View introduction to prepare for accelerator offload.

Probably same problem exists for stencil object
This commit is contained in:
paboyle 2018-03-04 16:38:08 +00:00
parent 442b0b406c
commit 3277bda130
19 changed files with 595 additions and 457 deletions

View File

@ -38,17 +38,20 @@ NAMESPACE_BEGIN(Grid);
// Pminus fowards // Pminus fowards
// Pplus backwards.. // Pplus backwards..
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi, const FermionField &phi_i,
FermionField &chi, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag, std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper) std::vector<Coeff_t> &upper)
{ {
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls =this->Ls; int Ls =this->Ls;
GridBase *grid=psi.Grid();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++; M5Dcalls++;
M5Dtime-=usecond(); M5Dtime-=usecond();
@ -81,17 +84,20 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi, const FermionField &phi_i,
FermionField &chi, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag, std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper) std::vector<Coeff_t> &upper)
{ {
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls =this->Ls; int Ls =this->Ls;
GridBase *grid=psi.Grid();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++; M5Dcalls++;
@ -125,12 +131,14 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
{ {
GridBase *grid=psi.Grid(); chi_i.Checkerboard()=psi_i.Checkerboard();
int Ls=this->Ls; GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
chi.Checkerboard()=psi.Checkerboard(); int Ls=this->Ls;
MooeeInvCalls++; MooeeInvCalls++;
MooeeInvTime-=usecond(); MooeeInvTime-=usecond();
@ -170,13 +178,15 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
{ {
GridBase *grid=psi.Grid(); chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
int Ls=this->Ls; int Ls=this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(psi.Checkerboard() == psi.Checkerboard()); assert(psi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
std::vector<Coeff_t> ueec(Ls); std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls); std::vector<Coeff_t> deec(Ls);

View File

@ -51,14 +51,18 @@ void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
this->MooeeInternal(psi,chi,DaggerNo,InverseYes); this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi, const FermionField &phi_i,
FermionField &chi, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag, std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper) std::vector<Coeff_t> &upper)
{ {
GridBase *grid=psi.Grid(); chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
const int nsimd= Simd::Nsimd(); const int nsimd= Simd::Nsimd();
@ -70,8 +74,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
assert(Ls/LLs==nsimd); assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0]; scalar_type * u_p = (scalar_type *)&u[0];
@ -195,14 +197,18 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi, const FermionField &phi_i,
FermionField &chi, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag, std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper) std::vector<Coeff_t> &upper)
{ {
GridBase *grid=psi.Grid(); chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi=psi_i.View();
auto phi=phi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd(); int nsimd= Simd::Nsimd();
@ -214,8 +220,6 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
assert(Ls/LLs==nsimd); assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0]; scalar_type * u_p = (scalar_type *)&u[0];
@ -339,11 +343,13 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
#endif #endif
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi, void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site, int LLs, int site,
Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm) Vector<iSinglet<Simd> > &Matm)
{ {
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512 #ifndef AVX512
{ {
SiteHalfSpinor BcastP; SiteHalfSpinor BcastP;
@ -513,11 +519,14 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
// Z-mobius version // Z-mobius version
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi, void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm) int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
{ {
#ifndef AVX512 #ifndef AVX512
{ {
auto psi = psi_i.View();
auto chi = chi_i.View();
SiteHalfSpinor BcastP; SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM; SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP; SiteHalfSpinor SiteChiP;
@ -761,11 +770,12 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{ {
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls; int Ls=this->Ls;
int LLs = psi.Grid()->_rdimensions[0]; int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs; int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard()=psi.Checkerboard();
Vector<iSinglet<Simd> > Matp; Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm; Vector<iSinglet<Simd> > Matm;

View File

@ -40,14 +40,16 @@ NAMESPACE_BEGIN(Grid);
// Pminus fowards // Pminus fowards
// Pplus backwards.. // Pplus backwards..
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi, void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper) std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{ {
chi_i.Checkerboard() = psi_i.Checkerboard();
int Ls = this->Ls; int Ls = this->Ls;
GridBase* grid = psi.Grid(); GridBase* grid = psi_i.Grid();
auto phi = phi_i.View();
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++; this->M5Dcalls++;
this->M5Dtime -= usecond(); this->M5Dtime -= usecond();
@ -78,13 +80,17 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
} }
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi, void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper) std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{ {
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
GridBase* grid = psi.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++; this->M5Dcalls++;
@ -116,16 +122,16 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
} }
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi) void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
{ {
GridBase* grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi=psi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
chi.Checkerboard() = psi.Checkerboard();
this->MooeeInvCalls++; this->MooeeInvCalls++;
this->MooeeInvTime -= usecond(); this->MooeeInvTime -= usecond();
thread_loop((int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls thread_loop((int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
auto tmp1 = psi[0]; auto tmp1 = psi[0];
@ -164,13 +170,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
} }
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi) void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
{ {
GridBase* grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
assert(psi.Checkerboard() == psi.Checkerboard()); assert(psi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
std::vector<Coeff_t> ueec(Ls); std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls+1); std::vector<Coeff_t> deec(Ls+1);

View File

@ -52,10 +52,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
} }
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi, void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper) std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{ {
GridBase* grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd(); const int nsimd = Simd::Nsimd();
@ -67,8 +72,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
assert(Ls/LLs == nsimd); assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0]; scalar_type* u_p = (scalar_type*) &u[0];
@ -197,10 +200,15 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
} }
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi, void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper) std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{ {
GridBase* grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd(); int nsimd = Simd::Nsimd();
@ -212,8 +220,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
assert(Ls/LLs == nsimd); assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0]; scalar_type* u_p = (scalar_type*) &u[0];
@ -342,9 +348,12 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
#endif #endif
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi, void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm) int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{ {
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512 #ifndef AVX512
{ {
SiteHalfSpinor BcastP; SiteHalfSpinor BcastP;
@ -532,12 +541,11 @@ void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fer
template<class Impl> template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{ {
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0]; int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs; int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard() = psi.Checkerboard();
Vector<iSinglet<Simd> > Matp; Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm; Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp; Vector<iSinglet<Simd> > *_Matp;

View File

@ -149,7 +149,7 @@ public:
typedef typename Impl::Compressor Compressor; \ typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \ typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t; \ typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base) \
@ -267,12 +267,14 @@ public:
int Ls=Btilde.Grid()->_fdimensions[0]; int Ls=Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid()); GaugeLinkField tmp(mat.Grid());
tmp = Zero(); tmp = Zero();
auto tmp_v = tmp.View();
auto Btilde_v = Btilde.View();
auto Atilde_v = Atilde.View();
thread_loop( (int sss=0;sss<tmp.Grid()->oSites();sss++),{ thread_loop( (int sss=0;sss<tmp.Grid()->oSites();sss++),{
int sU=sss; int sU=sss;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sF = s+Ls*sU; int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here tmp_v[sU] = tmp_v[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here
} }
}); });
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
@ -499,13 +501,10 @@ public:
const int Nsimd =vector_type::Nsimd(); const int Nsimd =vector_type::Nsimd();
// const int Nsimd = grid->Nsimd();
GridBase *grid= St.Grid();
int direction = St._directions[mu]; int direction = St._directions[mu];
int distance = St._distances[mu]; int distance = St._distances[mu];
int ptype = St._permute_type[mu]; int ptype = St._permute_type[mu];
int sl = grid->_simd_layout[direction]; int sl = St._simd_layout[direction];
// Fixme X.Y.Z.T hardcode in stencil // Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd; int mmu = mu % Nd;
@ -524,7 +523,7 @@ public:
extract(chi,vals); extract(chi,vals);
for(int s=0;s<Nsimd;s++){ for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s); St.iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1)); assert((icoor[direction]==0)||(icoor[direction]==1));
@ -592,9 +591,13 @@ public:
Uconj = where(coor==neglink,-Uconj,Uconj); Uconj = where(coor==neglink,-Uconj,Uconj);
} }
thread_loop( (auto ss=U.begin();ss<U.end();ss++),{ auto U_v = U.View();
Uds[ss](0)(mu) = U[ss](); auto Uds_v = Uds.View();
Uds[ss](1)(mu) = Uconj[ss](); auto Uconj_v = Uconj.View();
auto Utmp_v= Utmp.View();
thread_loop( (auto ss=U_v.begin();ss<U_v.end();ss++),{
Uds_v[ss](0)(mu) = U_v[ss]();
Uds_v[ss](1)(mu) = Uconj_v[ss]();
}); });
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
@ -605,9 +608,8 @@ public:
Utmp = where(coor==0,Uconj,Utmp); Utmp = where(coor==0,Uconj,Utmp);
} }
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
thread_loop((auto ss=U.begin();ss<U.end();ss++),{ Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
Uds[ss](0)(mu+4) = Utmp[ss]();
}); });
Utmp = Uconj; Utmp = Uconj;
@ -615,8 +617,8 @@ public:
Utmp = where(coor==0,U,Utmp); Utmp = where(coor==0,U,Utmp);
} }
thread_loop((auto ss=U.begin();ss<U.end();ss++),{ thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
Uds[ss](1)(mu+4) = Utmp[ss](); Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
}); });
} }
@ -628,8 +630,10 @@ public:
GaugeLinkField link(mat.Grid()); GaugeLinkField link(mat.Grid());
// use lorentz for flavour as hack. // use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A)); auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
thread_loop((auto ss = tmp.begin(); ss < tmp.end(); ss++), { auto link_v = link.View();
link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); auto tmp_v = tmp.View();
thread_loop((auto ss = tmp_v.begin(); ss < tmp_v.end(); ss++), {
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
}); });
PokeIndex<LorentzIndex>(mat, link, mu); PokeIndex<LorentzIndex>(mat, link, mu);
return; return;
@ -641,11 +645,14 @@ public:
GaugeLinkField tmp(mat.Grid()); GaugeLinkField tmp(mat.Grid());
tmp = Zero(); tmp = Zero();
auto tmp_v = tmp.View();
auto Atilde_v = Atilde.View();
auto Btilde_v = Btilde.View();
thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{ thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss; int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])); auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
} }
}); });
PokeIndex<LorentzIndex>(mat, tmp, mu); PokeIndex<LorentzIndex>(mat, tmp, mu);

View File

@ -249,8 +249,12 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
//////////////////////// ////////////////////////
// Call the single hop // Call the single hop
//////////////////////// ////////////////////////
auto U_v = U.View();
auto UUU_v = UUU.View();
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), { thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
Kernels::DhopDirK(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1); Kernels::DhopDirK(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
}); });
// Force in three link terms // Force in three link terms
@ -360,9 +364,12 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
Compressor compressor; Compressor compressor;
Stencil.HaloExchange(in, compressor); Stencil.HaloExchange(in, compressor);
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , { thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp); Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
}); });
}; };
@ -377,13 +384,17 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
Compressor compressor; Compressor compressor;
st.HaloExchange(in, compressor); st.HaloExchange(in, compressor);
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) { if (dag == DaggerYes) {
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), { thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
}); });
} else { } else {
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), { thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
}); });
} }
}; };

View File

@ -171,12 +171,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
Compressor compressor; Compressor compressor;
Stencil.HaloExchange(in,compressor); Stencil.HaloExchange(in,compressor);
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = in.View();
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{ thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sU=ss; int sU=ss;
int sF = s+Ls*sU; int sF = s+Ls*sU;
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp); Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
} }
}); });
}; };
@ -230,24 +233,26 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
Compressor compressor; Compressor compressor;
int LLs = in.Grid()->_rdimensions[0]; int LLs = in.Grid()->_rdimensions[0];
DhopTotalTime -= usecond(); DhopTotalTime -= usecond();
DhopCommTime -= usecond(); DhopCommTime -= usecond();
st.HaloExchange(in,compressor); st.HaloExchange(in,compressor);
DhopCommTime += usecond(); DhopCommTime += usecond();
DhopComputeTime -= usecond(); DhopComputeTime -= usecond();
auto U_v = U.View();
auto UUU_v = UUU.View();
auto out_v = out.View();
auto in_v = in.View();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion // Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) { if (dag == DaggerYes) {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), { thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
int sU=ss; int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out); Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
}); });
} else { } else {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{ thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
int sU=ss; int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
}); });
} }
DhopComputeTime += usecond(); DhopComputeTime += usecond();

View File

@ -35,16 +35,18 @@ See the full license in the file "LICENSE" in the top level distribution directo
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper) std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{ {
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
GridBase *grid = psi.Grid(); auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++; this->M5Dcalls++;
@ -76,16 +78,20 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &p
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper, std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs) std::vector<Coeff_t> &shift_coeffs)
{ {
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi.Grid();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++; this->M5Dcalls++;
@ -120,14 +126,17 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionFi
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper) std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{ {
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
GridBase *grid = psi.Grid(); auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++; this->M5Dcalls++;
@ -159,16 +168,19 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper, std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs) std::vector<Coeff_t> &shift_coeffs)
{ {
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi.Grid(); auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol // Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++; this->M5Dcalls++;
@ -204,14 +216,15 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi) void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
{ {
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
GridBase *grid = psi.Grid();
int Ls = this->Ls; int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
chi.Checkerboard() = psi.Checkerboard(); if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
this->MooeeInvCalls++; this->MooeeInvCalls++;
this->MooeeInvTime -= usecond(); this->MooeeInvTime -= usecond();
@ -251,12 +264,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &ch
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi) void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
{ {
GridBase *grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
chi.Checkerboard() = psi.Checkerboard();
this->MooeeInvCalls++; this->MooeeInvCalls++;
this->MooeeInvTime -= usecond(); this->MooeeInvTime -= usecond();
@ -306,14 +321,15 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionFie
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi) void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
{ {
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
GridBase *grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls; int Ls = this->Ls;
auto psi = psi_i.View();
chi.Checkerboard() = psi.Checkerboard(); auto chi = chi_i.View();
this->MooeeInvCalls++; this->MooeeInvCalls++;
this->MooeeInvTime -= usecond(); this->MooeeInvTime -= usecond();
@ -353,12 +369,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi) void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
{ {
GridBase *grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
chi.Checkerboard() = psi.Checkerboard();
this->MooeeInvCalls++; this->MooeeInvCalls++;
this->MooeeInvTime -= usecond(); this->MooeeInvTime -= usecond();

View File

@ -63,10 +63,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, Fermion
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi, void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper) std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{ {
GridBase* grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd(); const int nsimd = Simd::Nsimd();
@ -78,8 +82,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
assert(Ls/LLs == nsimd); assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0]; scalar_type* u_p = (scalar_type*) &u[0];
@ -208,11 +210,14 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi, void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi_i, const FermionField& phi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, FermionField& chi_i, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs) std::vector<Coeff_t>& shift_coeffs)
{ {
#if 0 #if 0
auto & psi = psi_i;
auto & phi = phi_i;
auto & chi = chi_i;
this->M5D(psi, phi, chi, lower, diag, upper); this->M5D(psi, phi, chi, lower, diag, upper);
@ -225,8 +230,11 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
} }
#else #else
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi.Grid(); GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd(); const int nsimd = Simd::Nsimd();
@ -239,7 +247,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
assert(Ls/LLs == nsimd); assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
@ -389,10 +396,14 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi, void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper) std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{ {
GridBase* grid = psi.Grid(); chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd(); int nsimd = Simd::Nsimd();
@ -404,7 +415,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
assert(Ls/LLs == nsimd); assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
@ -531,12 +541,14 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
} }
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi, void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs) std::vector<Coeff_t>& shift_coeffs)
{ {
#if 0 #if 0
auto & psi = psi_i;
auto & phi = phi_i;
auto & chi = chi_i;
this->M5Ddag(psi, phi, chi, lower, diag, upper); this->M5Ddag(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well? // FIXME: possible gain from vectorizing shift operation as well?
@ -548,8 +560,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
} }
#else #else
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi.Grid(); GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd(); int nsimd = Simd::Nsimd();
@ -562,7 +577,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
assert(Ls/LLs == nsimd); assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard()); assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun // just directly address via type pun
typedef typename Simd::scalar_type scalar_type; typedef typename Simd::scalar_type scalar_type;
@ -717,9 +731,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
#endif #endif
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi, void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm) int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{ {
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512 #ifndef AVX512
{ {
SiteHalfSpinor BcastP; SiteHalfSpinor BcastP;
@ -909,12 +925,12 @@ void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fermion
template<class Impl> template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{ {
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls; int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0]; int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs; int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard() = psi.Checkerboard();
Vector<iSinglet<Simd>> Matp; Vector<iSinglet<Simd>> Matp;
Vector<iSinglet<Simd>> Matm; Vector<iSinglet<Simd>> Matm;
Vector<iSinglet<Simd>>* _Matp; Vector<iSinglet<Simd>>* _Matp;

View File

@ -40,9 +40,9 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
//////////////////////////////////////////// ////////////////////////////////////////////
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF, SiteSpinor *buf, int sF,
int sU, const FermionField &in, SiteSpinor &out,int threeLink) { int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) {
const SiteSpinor *chi_p; const SiteSpinor *chi_p;
SiteSpinor chi; SiteSpinor chi;
SiteSpinor Uchi; SiteSpinor Uchi;
@ -183,9 +183,9 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
}; };
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU, SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out) { const FermionFieldView &in, FermionFieldView &out) {
SiteSpinor naik; SiteSpinor naik;
SiteSpinor naive; SiteSpinor naive;
int oneLink =0; int oneLink =0;
@ -221,9 +221,9 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
}; };
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
int oneLink =0; int oneLink =0;
int threeLink=1; int threeLink=1;
@ -258,8 +258,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
}; };
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF, void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int disp) int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
{ {
// Disp should be either +1,-1,+3,-3 // Disp should be either +1,-1,+3,-3
// What about "dag" ? // What about "dag" ?

View File

@ -47,29 +47,33 @@ public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base; typedef FermionOperator<Impl> Base;
typedef typename ViewMap<FermionField>::Type FermionFieldView;
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
public: public:
void DhopDirK(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp); int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf, void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink); int sF, int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink);
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf, void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink); int sF, int sU, const FermionFieldView &in, SiteSpinor&out,int threeLink);
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf, void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,SiteSpinor * buf,
int LLs, int sU, const FermionField &in, FermionField &out, int dag); int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf, void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int LLs, int sU, const FermionField &in, FermionField &out); int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf,
int LLs, int sU, const FermionField &in, FermionField &out); int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
public: public:

View File

@ -580,10 +580,10 @@ NAMESPACE_BEGIN(Grid);
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
DoubledGaugeField &UUU, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
}; };
@ -644,10 +644,10 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
// This is the single precision 5th direction vectorised kernel // This is the single precision 5th direction vectorised kernel
#include <simd/Intel512single.h> #include <simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
DoubledGaugeField &UUU, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
#ifdef AVX512 #ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t gauge0,gauge1,gauge2,gauge3;
@ -694,10 +694,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
#include <simd/Intel512double.h> #include <simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
DoubledGaugeField &UUU, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
#ifdef AVX512 #ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t gauge0,gauge1,gauge2,gauge3;
@ -775,10 +775,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
#include <simd/Intel512single.h> #include <simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
DoubledGaugeField &UUU, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
#ifdef AVX512 #ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t gauge0,gauge1,gauge2,gauge3;
@ -840,10 +840,10 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
#include <simd/Intel512double.h> #include <simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
DoubledGaugeField &UUU, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
#ifdef AVX512 #ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t gauge0,gauge1,gauge2,gauge3;
@ -905,10 +905,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \ #define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \ template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
DoubledGaugeField &UUU, \ DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, \ SiteSpinor *buf, int LLs, \
int sU, const FermionField &in, FermionField &out); int sU, const FermionFieldView &in, FermionFieldView &out);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD); KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF); KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);

View File

@ -89,9 +89,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out, int dag) int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
{ {
SiteSpinor naik; SiteSpinor naik;
SiteSpinor naive; SiteSpinor naive;
@ -110,9 +110,9 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, Do
} }
template <class Impl> template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF, SiteSpinor *buf, int sF,
int sU, const FermionField &in, SiteSpinor &out,int threeLink) int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink)
{ {
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -298,14 +298,14 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ #define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \ DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, \ SiteSpinor *buf, int LLs, \
int sU, const FermionField &in, FermionField &out, int dag); int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \ #define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \ template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, \
SiteSpinor *buf, int sF, \ SiteSpinor *buf, int sF, \
int sU, const FermionField &in, SiteSpinor &out,int threeLink) ; int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) ;
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD); DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF); DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD); DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);

View File

@ -216,8 +216,11 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
//////////////////////// ////////////////////////
// Call the single hop // Call the single hop
//////////////////////// ////////////////////////
auto U_v = U.View();
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{ thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{
Kernels::DhopDirK(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma); Kernels::DhopDirK(st, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma);
}); });
////////////////////////////////////////////////// //////////////////////////////////////////////////
@ -316,9 +319,11 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
Compressor compressor(dag); Compressor compressor(dag);
Stencil.HaloExchange(in, compressor); Stencil.HaloExchange(in, compressor);
auto in_v = in.View();
auto out_v = in.View();
auto Umu_v = Umu.View();
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{ thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{
Kernels::DhopDirK(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma); Kernels::DhopDirK(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma);
}); });
}; };
@ -333,13 +338,16 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
st.HaloExchange(in, compressor); st.HaloExchange(in, compressor);
int Opt = WilsonKernelsStatic::Opt; int Opt = WilsonKernelsStatic::Opt;
auto U_v = U.View();
auto in_v = in.View();
auto out_v= out.View();
if (dag == DaggerYes) { if (dag == DaggerYes) {
accelerator_loop( sss,in, { accelerator_loop( sss,in_v, {
Kernels::DhopSiteDag(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); Kernels::DhopSiteDag(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
}); });
} else { } else {
accelerator_loop( sss,in, { accelerator_loop( sss,in_v, {
Kernels::DhopSite(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); Kernels::DhopSite(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
}); });
} }
}; };
@ -367,15 +375,21 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
// Inefficient comms method but not performance critical. // Inefficient comms method but not performance critical.
tmp1 = Cshift(q_in_1, mu, 1); tmp1 = Cshift(q_in_1, mu, 1);
tmp2 = Cshift(q_in_2, mu, 1); tmp2 = Cshift(q_in_2, mu, 1);
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_in_1_v=q_in_1.View();
auto q_in_2_v=q_in_2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), { thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
Kernels::ContractConservedCurrentSiteFwd(tmp1[sU], Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
q_in_2[sU], q_in_2_v[sU],
q_out[sU], q_out_v[sU],
Umu, sU, mu); Umu_v, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sU], Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
tmp2[sU], tmp2_v[sU],
q_out[sU], q_out_v[sU],
Umu, sU, mu); Umu_v, sU, mu);
}); });
} }
@ -415,34 +429,40 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = ph*q_in; tmp = ph*q_in;
tmpBwd = Cshift(tmp, mu, -1); tmpBwd = Cshift(tmp, mu, -1);
auto coords_v = coords.View();
auto tmpFwd_v = tmpFwd.View();
auto tmpBwd_v = tmpBwd.View();
auto Umu_v = Umu.View();
auto q_out_v = q_out.View();
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), { thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
// Compute the sequential conserved current insertion only if our simd // Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need. // object contains a timeslice we need.
vInteger t_mask = ((coords[sU] >= tmin) && vInteger t_mask = ((coords_v[sU] >= tmin) &&
(coords[sU] <= tmax)); (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask); Integer timeSlices = Reduce(t_mask);
if (timeSlices > 0) { if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sU], Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
q_out[sU], q_out_v[sU],
Umu, sU, mu, t_mask); Umu_v, sU, mu, t_mask);
} }
// Repeat for backward direction. // Repeat for backward direction.
t_mask = ((coords[sU] >= (tmin + tshift)) && t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
(coords[sU] <= (tmax + tshift))); (coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0; unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 )); if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
timeSlices = Reduce(t_mask); timeSlices = Reduce(t_mask);
if (timeSlices > 0) { if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sU], Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
q_out[sU], q_out_v[sU],
Umu, sU, mu, t_mask); Umu_v, sU, mu, t_mask);
} }
}); });
} }

View File

@ -244,11 +244,14 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
assert(dirdisp<=7); assert(dirdisp<=7);
assert(dirdisp>=0); assert(dirdisp>=0);
auto Umu_v = Umu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{ thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sU=ss; int sU=ss;
int sF = s+Ls*sU; int sF = s+Ls*sU;
Kernels::DhopDirK(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma); Kernels::DhopDirK(Stencil,Umu_v,Stencil.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
} }
}); });
}; };
@ -279,7 +282,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
Atilde=A; Atilde=A;
int LLs = B.Grid()->_rdimensions[0]; int LLs = B.Grid()->_rdimensions[0];
DerivComputeTime-=usecond(); DerivComputeTime-=usecond();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -293,15 +295,20 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
//////////////////////// ////////////////////////
DerivDhopComputeTime -= usecond(); DerivDhopComputeTime -= usecond();
auto U_v = U.View();
auto Btilde_v = Btilde.View();
auto B_v = B.View();
int Bsites = B.Grid()->oSites();
int Usites = U.Grid()->oSites();
thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{ thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
int sU = sss; int sU = sss;
int sF = s + Ls * sU; int sF = s + Ls * sU;
assert(sF < B.Grid()->oSites()); assert(sF < Bsites);
assert(sU < U.Grid()->oSites()); assert(sU < Usites);
Kernels::DhopDirK(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma);
//////////////////////////// ////////////////////////////
// spin trace outer product // spin trace outer product
@ -406,6 +413,9 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons. // Ugly explicit thread mapping introduced for OPA reasons.
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
#pragma omp parallel reduction(max:ctime) reduction(max:ptime) #pragma omp parallel reduction(max:ctime) reduction(max:ptime)
{ {
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
@ -435,13 +445,13 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
for (int ss = myblock; ss < myblock+myn; ++ss) { for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
} }
} else { } else {
for (int ss = myblock; ss < myblock+myn; ++ss) { for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
} }
} }
ptime = usecond() - start; ptime = usecond() - start;
@ -470,14 +480,14 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
thread_loop( (int ss = 0; ss < sz; ss++) ,{ thread_loop( (int ss = 0; ss < sz; ss++) ,{
int sU = st.surface_list[ss]; int sU = st.surface_list[ss];
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
}); });
} else { } else {
int sz=st.surface_list.size(); int sz=st.surface_list.size();
thread_loop( (int ss = 0; ss < sz; ss++) ,{ thread_loop( (int ss = 0; ss < sz; ss++) ,{
int sU = st.surface_list[ss]; int sU = st.surface_list[ss];
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
}); });
} }
DhopComputeTime2+=usecond(); DhopComputeTime2+=usecond();
@ -505,17 +515,20 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
// Dhop takes the 4d grid from U, and makes a 5d index for fermion // Dhop takes the 4d grid from U, and makes a 5d index for fermion
int Opt = WilsonKernelsStatic::Opt; int Opt = WilsonKernelsStatic::Opt;
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) { if (dag == DaggerYes) {
accelerator_loop( ss, U, { accelerator_loop( ss, U_v, {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
}); });
} else { } else {
accelerator_loop( ss, U , { accelerator_loop( ss, U_v , {
int sU = ss; int sU = ss;
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
}); });
} }
DhopComputeTime+=usecond(); DhopComputeTime+=usecond();
@ -738,10 +751,17 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
unsigned int LLs = q_in_1.Grid()->_rdimensions[0]; unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
q_out = Zero(); q_out = Zero();
auto q_in_1_v = q_in_1.View();
auto q_in_2_v = q_in_2.View();
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s), // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one. // q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
tmp1 = Cshift(q_in_1, mu + 1, 1); tmp1 = Cshift(q_in_1, mu + 1, 1);
tmp2 = Cshift(q_in_2, mu + 1, 1); tmp2 = Cshift(q_in_2, mu + 1, 1);
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), { thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
unsigned int sF1 = sU * LLs; unsigned int sF1 = sU * LLs;
unsigned int sF2 = (sU + 1) * LLs - 1; unsigned int sF2 = (sU + 1) * LLs - 1;
@ -755,20 +775,20 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
// If vectorised in 5th dimension, reverse q2 vector to match up // If vectorised in 5th dimension, reverse q2 vector to match up
// sites correctly. // sites correctly.
if (Impl::LsVectorised) { if (Impl::LsVectorised) {
REVERSE_LS(q_in_2[sF2], qSite2, Ls / LLs); REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2[sF2], qmuSite2, Ls / LLs); REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
} else { } else {
qSite2 = q_in_2[sF2]; qSite2 = q_in_2_v[sF2];
qmuSite2 = tmp2[sF2]; qmuSite2 = tmp2_v[sF2];
} }
Kernels::ContractConservedCurrentSiteFwd(tmp1[sF1], Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
qSite2, qSite2,
q_out[sU], q_out_v[sU],
Umu, sU, mu, axial_sign); Umu_v, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sF1], Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
qmuSite2, qmuSite2,
q_out[sU], q_out_v[sU],
Umu, sU, mu, axial_sign); Umu_v, sU, mu, axial_sign);
sF1++; sF1++;
sF2--; sF2--;
} }
@ -808,7 +828,7 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
q_out = Zero(); q_out = Zero();
LatticeInteger coords(_FourDimGrid); LatticeInteger coords(_FourDimGrid);
LatticeCoordinate(coords, Tp); LatticeCoordinate(coords, Tp);
auto coords_v = coords.View();
// Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu // Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu
// by one. // by one.
tmp = Cshift(q_in, mu + 1, 1); tmp = Cshift(q_in, mu + 1, 1);
@ -816,11 +836,15 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = ph*q_in; tmp = ph*q_in;
tmpBwd = Cshift(tmp, mu + 1, -1); tmpBwd = Cshift(tmp, mu + 1, -1);
auto tmpBwd_v = tmpBwd.View();
auto tmpFwd_v = tmpFwd.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{ thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{
// Compute the sequential conserved current insertion only if our simd // Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need. // object contains a timeslice we need.
vInteger t_mask = ((coords[sU] >= tmin) && vInteger t_mask = ((coords_v[sU] >= tmin) &&
(coords[sU] <= tmax)); (coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask); Integer timeSlices = Reduce(t_mask);
if (timeSlices > 0) { if (timeSlices > 0) {
@ -828,20 +852,20 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int sF = sU * LLs; unsigned int sF = sU * LLs;
for (unsigned int s = 0; s < LLs; ++s) { for (unsigned int s = 0; s < LLs; ++s) {
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sF], Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sF],
q_out[sF], Umu, sU, q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign); mu, t_mask, axial_sign);
++sF; ++sF;
} }
} }
// Repeat for backward direction. // Repeat for backward direction.
t_mask = ((coords[sU] >= (tmin + tshift)) && t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
(coords[sU] <= (tmax + tshift))); (coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0; unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 )); if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
timeSlices = Reduce(t_mask); timeSlices = Reduce(t_mask);
@ -849,8 +873,8 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int sF = sU * LLs; unsigned int sF = sU * LLs;
for (unsigned int s = 0; s < LLs; ++s) { for (unsigned int s = 0; s < LLs; ++s) {
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sF], Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sF],
q_out[sF], Umu, sU, q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign); mu, t_mask, axial_sign);
++sF; ++sF;
} }

View File

@ -36,7 +36,7 @@ int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute; int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
template <class Impl> template <class Impl>
accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){}; WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
//////////////////////////////////////////// ////////////////////////////////////////////
// Generic implementation; move to different file? // Generic implementation; move to different file?
@ -103,9 +103,9 @@ accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
// All legs kernels ; comms then compute // All legs kernels ; comms then compute
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
@ -127,9 +127,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, Lebesg
}; };
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
@ -153,9 +153,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueO
// Interior kernels // Interior kernels
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
@ -178,9 +178,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, Leb
}; };
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
@ -204,9 +204,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, Lebesg
// Exterior kernels // Exterior kernels
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
// SiteHalfSpinor tmp; // SiteHalfSpinor tmp;
// SiteHalfSpinor chi; // SiteHalfSpinor chi;
@ -231,9 +231,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, Leb
}; };
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out) int sU, const FermionFieldView &in, FermionFieldView &out)
{ {
// SiteHalfSpinor tmp; // SiteHalfSpinor tmp;
// SiteHalfSpinor chi; // SiteHalfSpinor chi;
@ -258,9 +258,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, Lebesg
}; };
template <class Impl> template <class Impl>
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF, accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) { int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
SiteSpinor result; SiteSpinor result;
@ -300,21 +300,21 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
const SitePropagator &q_in_1, const SitePropagator &q_in_1,
const SitePropagator &q_in_2, const SitePropagator &q_in_2,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
bool switch_sign) bool switch_sign)
{ {
SitePropagator result, tmp; SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu); Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign)
{ if (switch_sign) {
q_out -= result; q_out -= result;
} } else {
else
{
q_out += result; q_out += result;
} }
} }
@ -330,21 +330,20 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
const SitePropagator &q_in_1, const SitePropagator &q_in_1,
const SitePropagator &q_in_2, const SitePropagator &q_in_2,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
bool switch_sign) bool switch_sign)
{ {
SitePropagator result, tmp; SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5); Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd); Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign) if (switch_sign) {
{
q_out += result; q_out += result;
} } else {
else
{
q_out -= result; q_out -= result;
} }
} }
@ -356,7 +355,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
const SitePropagator &q_in_1, \ const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \ const SitePropagator &q_in_2, \
SitePropagator &q_out, \ SitePropagator &q_out, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
unsigned int sU, \ unsigned int sU, \
unsigned int mu, \ unsigned int mu, \
bool switch_sign) \ bool switch_sign) \
@ -368,7 +367,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
const SitePropagator &q_in_1, \ const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \ const SitePropagator &q_in_2, \
SitePropagator &q_out, \ SitePropagator &q_out, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
unsigned int mu, \ unsigned int mu, \
unsigned int sU, \ unsigned int sU, \
bool switch_sign) \ bool switch_sign) \
@ -391,25 +390,23 @@ NO_CURR_SITE(GparityWilsonImplDF);
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
vInteger t_mask, vInteger t_mask,
bool switch_sign) bool switch_sign)
{ {
SitePropagator result; SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu); Impl::multLinkProp(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu); result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries. // Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result); result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) if (switch_sign) {
{
q_out -= result; q_out -= result;
} } else {
else
{
q_out += result; q_out += result;
} }
} }
@ -423,7 +420,7 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
vInteger t_mask, vInteger t_mask,
@ -436,12 +433,9 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
// Zero any unwanted timeslice entries. // Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result); result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) if (switch_sign) {
{
q_out += result; q_out += result;
} } else {
else
{
q_out -= result; q_out -= result;
} }
} }

View File

@ -51,12 +51,15 @@ public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base; typedef FermionOperator<Impl> Base;
typedef typename ViewMap<FermionField>::Type FermionFieldView;
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
public: public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{ {
bgq_l1p_optimisation(1); bgq_l1p_optimisation(1);
switch(Opt) { switch(Opt) {
@ -99,8 +102,8 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) { int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) {
// no kernel choice // no kernel choice
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
@ -116,8 +119,8 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{ {
bgq_l1p_optimisation(1); bgq_l1p_optimisation(1);
switch(Opt) { switch(Opt) {
@ -161,8 +164,8 @@ public:
template <bool EnableBool = true> accelerator template <bool EnableBool = true> accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) {
for (int site = 0; site < Nsite; site++) { for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) { for (int s = 0; s < Ls; s++) {
@ -176,8 +179,8 @@ public:
} }
} }
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, accelerator void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Utilities for inserting Wilson conserved current. // Utilities for inserting Wilson conserved current.
@ -185,27 +188,27 @@ public:
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2, const SitePropagator &q_in_2,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
bool switch_sign = false); bool switch_sign = false);
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2, const SitePropagator &q_in_2,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
bool switch_sign = false); bool switch_sign = false);
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
vInteger t_mask, vInteger t_mask,
bool switch_sign = false); bool switch_sign = false);
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out, SitePropagator &q_out,
DoubledGaugeField &U, DoubledGaugeFieldView &U,
unsigned int sU, unsigned int sU,
unsigned int mu, unsigned int mu,
vInteger t_mask, vInteger t_mask,
@ -213,60 +216,60 @@ public:
private: private:
// Specialised variants // Specialised variants
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out); int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
public: public:

View File

@ -38,43 +38,43 @@ NAMESPACE_BEGIN(Grid);
// Default to no assembler implementation // Default to no assembler implementation
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
template<class Impl> void template<class Impl> void
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{ {
assert(0); assert(0);
} }
@ -83,21 +83,21 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h> #include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
#define INSTANTIATE_ASM(A)\ #define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
\ \
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
\ \
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
\ \
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
INSTANTIATE_ASM(WilsonImplF); INSTANTIATE_ASM(WilsonImplF);
INSTANTIATE_ASM(WilsonImplD); INSTANTIATE_ASM(WilsonImplD);

View File

@ -573,8 +573,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> void template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
@ -600,8 +600,8 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -626,8 +626,8 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
@ -654,8 +654,8 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -681,8 +681,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
} }
template<class Impl> void template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
@ -711,8 +711,8 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
@ -746,58 +746,58 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \ WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
LebesgueOrder &lo, \ LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \ SiteHalfSpinor *buf, \
int sF,int sU, \ int sF,int sU, \
const FermionField &in, \ const FermionFieldView &in, \
FermionField &out){ assert(0); } \ FermionFieldView &out){ assert(0); } \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \ WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
LebesgueOrder &lo, \ LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \ SiteHalfSpinor *buf, \
int sF,int sU, \ int sF,int sU, \
const FermionField &in, \ const FermionFieldView &in, \
FermionField &out){ assert(0); } \ FermionFieldView &out){ assert(0); } \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \ WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
LebesgueOrder &lo, \ LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \ SiteHalfSpinor *buf, \
int sF,int sU, \ int sF,int sU, \
const FermionField &in, \ const FermionFieldView &in, \
FermionField &out){ assert(0); } \ FermionFieldView &out){ assert(0); } \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \ WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
LebesgueOrder &lo, \ LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \ SiteHalfSpinor *buf, \
int sF,int sU, \ int sF,int sU, \
const FermionField &in, \ const FermionFieldView &in, \
FermionField &out){ assert(0); } \ FermionFieldView &out){ assert(0); } \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \ WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
LebesgueOrder &lo, \ LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \ SiteHalfSpinor *buf, \
int sF,int sU, \ int sF,int sU, \
const FermionField &in, \ const FermionFieldView &in, \
FermionField &out){ assert(0); } \ FermionFieldView &out){ assert(0); } \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \ WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
LebesgueOrder &lo, \ LebesgueOrder &lo, \
DoubledGaugeField &U, \ DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \ SiteHalfSpinor *buf, \
int sF,int sU, \ int sF,int sU, \
const FermionField &in, \ const FermionFieldView &in, \
FermionField &out){ assert(0); } \ FermionFieldView &out){ assert(0); } \
#define HAND_SPECIALISE_GPARITY(IMPL) \ #define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
typedef IMPL Impl; \ typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \ typedef typename Simd::scalar_type S; \
@ -812,8 +812,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \ } \
\ \
template<> \ template<> \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
typedef IMPL Impl; \ typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \ typedef typename Simd::scalar_type S; \
@ -828,8 +828,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \ } \
\ \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
typedef IMPL Impl; \ typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \ typedef typename Simd::scalar_type S; \
@ -844,8 +844,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \ } \
\ \
template<> \ template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
typedef IMPL Impl; \ typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \ typedef typename Simd::scalar_type S; \
@ -860,8 +860,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \ } \
\ \
template<> void \ template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
typedef IMPL Impl; \ typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \ typedef typename Simd::scalar_type S; \
@ -877,8 +877,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \ } \
template<> \ template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
typedef IMPL Impl; \ typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \ typedef typename Simd::scalar_type S; \
@ -904,18 +904,18 @@ HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation ///////////////////// ////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \ #define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\ int ss,int sU,const FermionFieldView &in, FermionFieldView &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
INSTANTIATE_THEM(WilsonImplF); INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD); INSTANTIATE_THEM(WilsonImplD);