1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

View introduction to prepare for accelerator offload.

Probably same problem exists for stencil object
This commit is contained in:
paboyle 2018-03-04 16:38:08 +00:00
parent 442b0b406c
commit 3277bda130
19 changed files with 595 additions and 457 deletions

View File

@ -38,17 +38,20 @@ NAMESPACE_BEGIN(Grid);
// Pminus fowards
// Pplus backwards..
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls =this->Ls;
GridBase *grid=psi.Grid();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
M5Dtime-=usecond();
@ -81,17 +84,20 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls =this->Ls;
GridBase *grid=psi.Grid();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
M5Dcalls++;
@ -125,12 +131,14 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
{
GridBase *grid=psi.Grid();
int Ls=this->Ls;
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
MooeeInvCalls++;
MooeeInvTime-=usecond();
@ -170,13 +178,15 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
{
GridBase *grid=psi.Grid();
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
int Ls=this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(psi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls);

View File

@ -51,14 +51,18 @@ void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi.Grid();
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd= Simd::Nsimd();
@ -70,8 +74,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
@ -124,7 +126,7 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
}
#else
for(int v=0;v<LLs;v++){
vprefetch(psi[ss+v+LLs]);
int vp= (v==LLs-1) ? 0 : v+1;
@ -195,14 +197,18 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
const FermionField &phi_i,
FermionField &chi_i,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi.Grid();
chi_i.Checkerboard()=psi_i.Checkerboard();
GridBase *grid=psi_i.Grid();
auto psi=psi_i.View();
auto phi=phi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
@ -214,8 +220,6 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
assert(Ls/LLs==nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
@ -339,11 +343,13 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
#endif
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi,
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site,
Vector<iSinglet<Simd> > &Matp,
Vector<iSinglet<Simd> > &Matm)
{
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512
{
SiteHalfSpinor BcastP;
@ -513,11 +519,14 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
// Z-mobius version
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi,
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i,
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
{
#ifndef AVX512
{
auto psi = psi_i.View();
auto chi = chi_i.View();
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
SiteHalfSpinor SiteChiP;
@ -761,11 +770,12 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
chi.Checkerboard()=psi.Checkerboard();
int Ls=this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard()=psi.Checkerboard();
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;

View File

@ -40,18 +40,20 @@ NAMESPACE_BEGIN(Grid);
// Pminus fowards
// Pplus backwards..
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
int Ls = this->Ls;
GridBase* grid = psi.Grid();
GridBase* grid = psi_i.Grid();
auto phi = phi_i.View();
auto psi = psi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
this->M5Dtime -= usecond();
thread_loop( (int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
for(int s=0; s<Ls; s++){
auto tmp = psi[0];
@ -78,13 +80,17 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
int Ls = this->Ls;
GridBase* grid = psi.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard()=psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
@ -116,16 +122,16 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
{
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi=psi_i.View();
auto chi=chi_i.View();
int Ls = this->Ls;
chi.Checkerboard() = psi.Checkerboard();
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
thread_loop((int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
auto tmp1 = psi[0];
@ -164,13 +170,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
{
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
assert(psi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
std::vector<Coeff_t> ueec(Ls);
std::vector<Coeff_t> deec(Ls+1);

View File

@ -52,10 +52,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
@ -67,8 +72,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
@ -197,10 +200,15 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
}
template<class Impl>
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
@ -212,8 +220,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
@ -342,9 +348,12 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
#endif
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512
{
SiteHalfSpinor BcastP;
@ -532,12 +541,11 @@ void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fer
template<class Impl>
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard() = psi.Checkerboard();
Vector<iSinglet<Simd> > Matp;
Vector<iSinglet<Simd> > Matm;
Vector<iSinglet<Simd> > *_Matp;

View File

@ -149,8 +149,8 @@ public:
typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t; \
typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base)
@ -267,12 +267,14 @@ public:
int Ls=Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
auto tmp_v = tmp.View();
auto Btilde_v = Btilde.View();
auto Atilde_v = Atilde.View();
thread_loop( (int sss=0;sss<tmp.Grid()->oSites();sss++),{
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
tmp_v[sU] = tmp_v[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here
}
});
PokeIndex<LorentzIndex>(mat,tmp,mu);
@ -499,13 +501,10 @@ public:
const int Nsimd =vector_type::Nsimd();
// const int Nsimd = grid->Nsimd();
GridBase *grid= St.Grid();
int direction = St._directions[mu];
int distance = St._distances[mu];
int ptype = St._permute_type[mu];
int sl = grid->_simd_layout[direction];
int sl = St._simd_layout[direction];
// Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd;
@ -524,7 +523,7 @@ public:
extract(chi,vals);
for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s);
St.iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1));
@ -592,9 +591,13 @@ public:
Uconj = where(coor==neglink,-Uconj,Uconj);
}
thread_loop( (auto ss=U.begin();ss<U.end();ss++),{
Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss]();
auto U_v = U.View();
auto Uds_v = Uds.View();
auto Uconj_v = Uconj.View();
auto Utmp_v= Utmp.View();
thread_loop( (auto ss=U_v.begin();ss<U_v.end();ss++),{
Uds_v[ss](0)(mu) = U_v[ss]();
Uds_v[ss](1)(mu) = Uconj_v[ss]();
});
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
@ -605,9 +608,8 @@ public:
Utmp = where(coor==0,Uconj,Utmp);
}
thread_loop((auto ss=U.begin();ss<U.end();ss++),{
Uds[ss](0)(mu+4) = Utmp[ss]();
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
});
Utmp = Uconj;
@ -615,8 +617,8 @@ public:
Utmp = where(coor==0,U,Utmp);
}
thread_loop((auto ss=U.begin();ss<U.end();ss++),{
Uds[ss](1)(mu+4) = Utmp[ss]();
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
});
}
@ -628,8 +630,10 @@ public:
GaugeLinkField link(mat.Grid());
// use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
thread_loop((auto ss = tmp.begin(); ss < tmp.end(); ss++), {
link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
auto link_v = link.View();
auto tmp_v = tmp.View();
thread_loop((auto ss = tmp_v.begin(); ss < tmp_v.end(); ss++), {
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
});
PokeIndex<LorentzIndex>(mat, link, mu);
return;
@ -641,11 +645,14 @@ public:
GaugeLinkField tmp(mat.Grid());
tmp = Zero();
auto tmp_v = tmp.View();
auto Atilde_v = Atilde.View();
auto Btilde_v = Btilde.View();
thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
}
});
PokeIndex<LorentzIndex>(mat, tmp, mu);

View File

@ -249,8 +249,12 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
////////////////////////
// Call the single hop
////////////////////////
auto U_v = U.View();
auto UUU_v = UUU.View();
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
Kernels::DhopDirK(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
Kernels::DhopDirK(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
});
// Force in three link terms
@ -360,9 +364,12 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
Compressor compressor;
Stencil.HaloExchange(in, compressor);
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
});
};
@ -377,13 +384,17 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
Compressor compressor;
st.HaloExchange(in, compressor);
auto U_v = U.View();
auto UUU_v = UUU.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
});
} else {
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
});
}
};

View File

@ -171,12 +171,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
Compressor compressor;
Stencil.HaloExchange(in,compressor);
auto Umu_v = Umu.View();
auto UUUmu_v = UUUmu.View();
auto in_v = in.View();
auto out_v = in.View();
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
}
});
};
@ -230,24 +233,26 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
Compressor compressor;
int LLs = in.Grid()->_rdimensions[0];
DhopTotalTime -= usecond();
DhopCommTime -= usecond();
st.HaloExchange(in,compressor);
DhopCommTime += usecond();
DhopComputeTime -= usecond();
auto U_v = U.View();
auto UUU_v = UUU.View();
auto out_v = out.View();
auto in_v = in.View();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
int sU=ss;
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
});
} else {
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
int sU=ss;
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
});
}
DhopComputeTime += usecond();

View File

@ -35,16 +35,18 @@ See the full license in the file "LICENSE" in the top level distribution directo
NAMESPACE_BEGIN(Grid);
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
GridBase *grid = psi.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
@ -76,16 +78,20 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &p
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi.Grid();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
@ -120,14 +126,17 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionFi
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
GridBase *grid = psi.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
@ -159,16 +168,19 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
std::vector<Coeff_t> &shift_coeffs)
{
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
GridBase *grid = psi.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// Flops = 6.0*(Nc*Ns) *Ls*vol
this->M5Dcalls++;
@ -204,14 +216,15 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
{
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
GridBase *grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
chi.Checkerboard() = psi.Checkerboard();
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
@ -251,12 +264,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &ch
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
{
GridBase *grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
auto psi = psi_i.View();
auto chi = chi_i.View();
chi.Checkerboard() = psi.Checkerboard();
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
@ -306,14 +321,15 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionFie
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
{
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
GridBase *grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
int Ls = this->Ls;
chi.Checkerboard() = psi.Checkerboard();
auto psi = psi_i.View();
auto chi = chi_i.View();
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();
@ -353,12 +369,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField
}
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
{
GridBase *grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase *grid = psi_i.Grid();
auto psi = psi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
chi.Checkerboard() = psi.Checkerboard();
this->MooeeInvCalls++;
this->MooeeInvTime -= usecond();

View File

@ -63,10 +63,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, Fermion
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
@ -78,8 +82,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type* u_p = (scalar_type*) &u[0];
@ -208,11 +210,14 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi_i, const FermionField& phi_i,
FermionField& chi_i, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
auto & psi = psi_i;
auto & phi = phi_i;
auto & chi = chi_i;
this->M5D(psi, phi, chi, lower, diag, upper);
@ -225,8 +230,11 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
}
#else
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
const int nsimd = Simd::Nsimd();
@ -239,7 +247,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
@ -389,10 +396,14 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
{
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
@ -404,7 +415,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
@ -531,12 +541,14 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
}
template<class Impl>
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
std::vector<Coeff_t>& shift_coeffs)
{
#if 0
auto & psi = psi_i;
auto & phi = phi_i;
auto & chi = chi_i;
this->M5Ddag(psi, phi, chi, lower, diag, upper);
// FIXME: possible gain from vectorizing shift operation as well?
@ -548,8 +560,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
}
#else
GridBase* grid = psi.Grid();
chi_i.Checkerboard() = psi_i.Checkerboard();
GridBase* grid = psi_i.Grid();
auto psi = psi_i.View();
auto phi = phi_i.View();
auto chi = chi_i.View();
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd = Simd::Nsimd();
@ -562,7 +577,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
assert(Ls/LLs == nsimd);
assert(phi.Checkerboard() == psi.Checkerboard());
chi.Checkerboard() = psi.Checkerboard();
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
@ -717,9 +731,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
#endif
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
{
auto psi = psi_i.View();
auto chi = chi_i.View();
#ifndef AVX512
{
SiteHalfSpinor BcastP;
@ -909,12 +925,12 @@ void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fermion
template<class Impl>
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
{
chi.Checkerboard() = psi.Checkerboard();
int Ls = this->Ls;
int LLs = psi.Grid()->_rdimensions[0];
int vol = psi.Grid()->oSites()/LLs;
chi.Checkerboard() = psi.Checkerboard();
Vector<iSinglet<Simd>> Matp;
Vector<iSinglet<Simd>> Matm;
Vector<iSinglet<Simd>>* _Matp;

View File

@ -40,9 +40,9 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) {
const SiteSpinor *chi_p;
SiteSpinor chi;
SiteSpinor Uchi;
@ -183,9 +183,9 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs, int sU,
const FermionField &in, FermionField &out) {
const FermionFieldView &in, FermionFieldView &out) {
SiteSpinor naik;
SiteSpinor naive;
int oneLink =0;
@ -221,9 +221,9 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
};
template <class Impl>
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
int oneLink =0;
int threeLink=1;
@ -258,8 +258,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
};
template <class Impl>
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int disp)
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
{
// Disp should be either +1,-1,+3,-3
// What about "dag" ?

View File

@ -46,30 +46,34 @@ public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
typedef typename ViewMap<FermionField>::Type FermionFieldView;
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
public:
void DhopDirK(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink);
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, SiteSpinor&out,int threeLink);
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,SiteSpinor * buf,
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
int LLs, int sU, const FermionField &in, FermionField &out);
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf,
int LLs, int sU, const FermionField &in, FermionField &out);
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf,
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
public:

View File

@ -580,10 +580,10 @@ NAMESPACE_BEGIN(Grid);
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
};
@ -644,10 +644,10 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
// This is the single precision 5th direction vectorised kernel
#include <simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
@ -694,10 +694,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
#include <simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
@ -775,10 +775,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
#include <simd/Intel512single.h>
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
@ -840,10 +840,10 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
#include <simd/Intel512double.h>
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
DoubledGaugeField &UUU,
DoubledGaugeFieldView &U,
DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
#ifdef AVX512
uint64_t gauge0,gauge1,gauge2,gauge3;
@ -905,10 +905,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U, \
DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, \
int sU, const FermionField &in, FermionField &out);
int sU, const FermionFieldView &in, FermionFieldView &out);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);

View File

@ -89,9 +89,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int LLs,
int sU, const FermionField &in, FermionField &out, int dag)
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
{
SiteSpinor naik;
SiteSpinor naive;
@ -110,9 +110,9 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, Do
}
template <class Impl>
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteSpinor *buf, int sF,
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -298,14 +298,14 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
DoubledGaugeField &U,DoubledGaugeField &UUU, \
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
SiteSpinor *buf, int LLs, \
int sU, const FermionField &in, FermionField &out, int dag);
int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, \
SiteSpinor *buf, int sF, \
int sU, const FermionField &in, SiteSpinor &out,int threeLink) ;
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) ;
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);

View File

@ -216,8 +216,11 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
////////////////////////
// Call the single hop
////////////////////////
auto U_v = U.View();
auto B_v = B.View();
auto Btilde_v = Btilde.View();
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{
Kernels::DhopDirK(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
Kernels::DhopDirK(st, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma);
});
//////////////////////////////////////////////////
@ -316,9 +319,11 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
Compressor compressor(dag);
Stencil.HaloExchange(in, compressor);
auto in_v = in.View();
auto out_v = in.View();
auto Umu_v = Umu.View();
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{
Kernels::DhopDirK(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
Kernels::DhopDirK(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma);
});
};
@ -333,13 +338,16 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
st.HaloExchange(in, compressor);
int Opt = WilsonKernelsStatic::Opt;
auto U_v = U.View();
auto in_v = in.View();
auto out_v= out.View();
if (dag == DaggerYes) {
accelerator_loop( sss,in, {
Kernels::DhopSiteDag(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
accelerator_loop( sss,in_v, {
Kernels::DhopSiteDag(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
});
} else {
accelerator_loop( sss,in, {
Kernels::DhopSite(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
accelerator_loop( sss,in_v, {
Kernels::DhopSite(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
});
}
};
@ -367,15 +375,21 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
// Inefficient comms method but not performance critical.
tmp1 = Cshift(q_in_1, mu, 1);
tmp2 = Cshift(q_in_2, mu, 1);
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_in_1_v=q_in_1.View();
auto q_in_2_v=q_in_2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
Kernels::ContractConservedCurrentSiteFwd(tmp1[sU],
q_in_2[sU],
q_out[sU],
Umu, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sU],
tmp2[sU],
q_out[sU],
Umu, sU, mu);
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
q_in_2_v[sU],
q_out_v[sU],
Umu_v, sU, mu);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
tmp2_v[sU],
q_out_v[sU],
Umu_v, sU, mu);
});
}
@ -415,34 +429,40 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = ph*q_in;
tmpBwd = Cshift(tmp, mu, -1);
auto coords_v = coords.View();
auto tmpFwd_v = tmpFwd.View();
auto tmpBwd_v = tmpBwd.View();
auto Umu_v = Umu.View();
auto q_out_v = q_out.View();
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vInteger t_mask = ((coords[sU] >= tmin) &&
(coords[sU] <= tmax));
vInteger t_mask = ((coords_v[sU] >= tmin) &&
(coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask);
if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sU],
q_out[sU],
Umu, sU, mu, t_mask);
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
q_out_v[sU],
Umu_v, sU, mu, t_mask);
}
// Repeat for backward direction.
t_mask = ((coords[sU] >= (tmin + tshift)) &&
(coords[sU] <= (tmax + tshift)));
t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
(coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
timeSlices = Reduce(t_mask);
if (timeSlices > 0) {
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sU],
q_out[sU],
Umu, sU, mu, t_mask);
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
q_out_v[sU],
Umu_v, sU, mu, t_mask);
}
});
}

View File

@ -244,11 +244,14 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
assert(dirdisp<=7);
assert(dirdisp>=0);
auto Umu_v = Umu.View();
auto in_v = in.View();
auto out_v = out.View();
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
for(int s=0;s<Ls;s++){
int sU=ss;
int sF = s+Ls*sU;
Kernels::DhopDirK(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
Kernels::DhopDirK(Stencil,Umu_v,Stencil.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
}
});
};
@ -279,7 +282,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
Atilde=A;
int LLs = B.Grid()->_rdimensions[0];
DerivComputeTime-=usecond();
for (int mu = 0; mu < Nd; mu++) {
////////////////////////////////////////////////////////////////////////
@ -293,15 +295,20 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
////////////////////////
DerivDhopComputeTime -= usecond();
auto U_v = U.View();
auto Btilde_v = Btilde.View();
auto B_v = B.View();
int Bsites = B.Grid()->oSites();
int Usites = U.Grid()->oSites();
thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{
for (int s = 0; s < Ls; s++) {
int sU = sss;
int sF = s + Ls * sU;
assert(sF < B.Grid()->oSites());
assert(sU < U.Grid()->oSites());
assert(sF < Bsites);
assert(sU < Usites);
Kernels::DhopDirK(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma);
////////////////////////////
// spin trace outer product
@ -406,6 +413,9 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Ugly explicit thread mapping introduced for OPA reasons.
//////////////////////////////////////////////////////////////////////////////////////////////////////
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
{
int tid = omp_get_thread_num();
@ -435,13 +445,13 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
}
} else {
for (int ss = myblock; ss < myblock+myn; ++ss) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
}
}
ptime = usecond() - start;
@ -470,14 +480,14 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
thread_loop( (int ss = 0; ss < sz; ss++) ,{
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
});
} else {
int sz=st.surface_list.size();
thread_loop( (int ss = 0; ss < sz; ss++) ,{
int sU = st.surface_list[ss];
int sF = LLs * sU;
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
});
}
DhopComputeTime2+=usecond();
@ -505,17 +515,20 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
int Opt = WilsonKernelsStatic::Opt;
auto U_v = U.View();
auto in_v = in.View();
auto out_v = out.View();
if (dag == DaggerYes) {
accelerator_loop( ss, U, {
accelerator_loop( ss, U_v, {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
});
} else {
accelerator_loop( ss, U , {
accelerator_loop( ss, U_v , {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
});
}
DhopComputeTime+=usecond();
@ -738,10 +751,17 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
q_out = Zero();
auto q_in_1_v = q_in_1.View();
auto q_in_2_v = q_in_2.View();
auto tmp1_v = tmp1.View();
auto tmp2_v = tmp2.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
tmp1 = Cshift(q_in_1, mu + 1, 1);
tmp2 = Cshift(q_in_2, mu + 1, 1);
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
unsigned int sF1 = sU * LLs;
unsigned int sF2 = (sU + 1) * LLs - 1;
@ -755,20 +775,20 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
// If vectorised in 5th dimension, reverse q2 vector to match up
// sites correctly.
if (Impl::LsVectorised) {
REVERSE_LS(q_in_2[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2[sF2], qmuSite2, Ls / LLs);
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
} else {
qSite2 = q_in_2[sF2];
qmuSite2 = tmp2[sF2];
qSite2 = q_in_2_v[sF2];
qmuSite2 = tmp2_v[sF2];
}
Kernels::ContractConservedCurrentSiteFwd(tmp1[sF1],
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
qSite2,
q_out[sU],
Umu, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sF1],
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
qmuSite2,
q_out[sU],
Umu, sU, mu, axial_sign);
q_out_v[sU],
Umu_v, sU, mu, axial_sign);
sF1++;
sF2--;
}
@ -808,7 +828,7 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
q_out = Zero();
LatticeInteger coords(_FourDimGrid);
LatticeCoordinate(coords, Tp);
auto coords_v = coords.View();
// Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu
// by one.
tmp = Cshift(q_in, mu + 1, 1);
@ -816,11 +836,15 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = ph*q_in;
tmpBwd = Cshift(tmp, mu + 1, -1);
auto tmpBwd_v = tmpBwd.View();
auto tmpFwd_v = tmpFwd.View();
auto q_out_v = q_out.View();
auto Umu_v = Umu.View();
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{
// Compute the sequential conserved current insertion only if our simd
// object contains a timeslice we need.
vInteger t_mask = ((coords[sU] >= tmin) &&
(coords[sU] <= tmax));
vInteger t_mask = ((coords_v[sU] >= tmin) &&
(coords_v[sU] <= tmax));
Integer timeSlices = Reduce(t_mask);
if (timeSlices > 0) {
@ -828,20 +852,20 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int sF = sU * LLs;
for (unsigned int s = 0; s < LLs; ++s) {
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sF],
q_out[sF], Umu, sU,
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sF],
q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign);
++sF;
}
}
// Repeat for backward direction.
t_mask = ((coords[sU] >= (tmin + tshift)) &&
(coords[sU] <= (tmax + tshift)));
t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
(coords_v[sU] <= (tmax + tshift)));
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
unsigned int t0 = 0;
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
timeSlices = Reduce(t_mask);
@ -849,8 +873,8 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int sF = sU * LLs;
for (unsigned int s = 0; s < LLs; ++s) {
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sF],
q_out[sF], Umu, sU,
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sF],
q_out_v[sF], Umu_v, sU,
mu, t_mask, axial_sign);
++sF;
}

View File

@ -36,7 +36,7 @@ int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
template <class Impl>
accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
// Generic implementation; move to different file?
@ -103,9 +103,9 @@ accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -127,9 +127,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, Lebesg
};
template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -153,9 +153,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueO
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -178,9 +178,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, Leb
};
template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -204,9 +204,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, Lebesg
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
// SiteHalfSpinor tmp;
// SiteHalfSpinor chi;
@ -231,9 +231,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, Leb
};
template <class Impl>
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
int sU, const FermionFieldView &in, FermionFieldView &out)
{
// SiteHalfSpinor tmp;
// SiteHalfSpinor chi;
@ -258,9 +258,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, Lebesg
};
template <class Impl>
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteSpinor result;
@ -300,23 +300,23 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign)
{
q_out -= result;
}
else
{
q_out += result;
}
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
@ -330,23 +330,22 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign)
{
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign)
{
q_out += result;
}
else
{
q_out -= result;
}
SitePropagator result, tmp;
Gamma g5(Gamma::Algebra::Gamma5);
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
// G-parity requires more specialised implementation.
@ -356,7 +355,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \
SitePropagator &q_out, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
unsigned int sU, \
unsigned int mu, \
bool switch_sign) \
@ -368,7 +367,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
const SitePropagator &q_in_1, \
const SitePropagator &q_in_2, \
SitePropagator &q_out, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
unsigned int mu, \
unsigned int sU, \
bool switch_sign) \
@ -391,27 +390,25 @@ NO_CURR_SITE(GparityWilsonImplDF);
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu);
result = WilsonCurrentFwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign)
{
q_out -= result;
}
else
{
q_out += result;
}
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out -= result;
} else {
q_out += result;
}
}
/*******************************************************************************
@ -423,27 +420,24 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
template<class Impl>
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign)
{
SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
SitePropagator result;
Impl::multLinkProp(result, U[sU], q_in, mu + Nd);
result = WilsonCurrentBwd(result, mu);
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign)
{
q_out += result;
}
else
{
q_out -= result;
}
// Zero any unwanted timeslice entries.
result = predicatedWhere(t_mask, result, 0.*result);
if (switch_sign) {
q_out += result;
} else {
q_out -= result;
}
}
FermOpTemplateInstantiate(WilsonKernels);

View File

@ -50,13 +50,16 @@ public:
INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base;
typedef typename ViewMap<FermionField>::Type FermionFieldView;
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
public:
template <bool EnableBool = true> accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
@ -99,8 +102,8 @@ public:
template <bool EnableBool = true> accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) {
// no kernel choice
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
@ -116,8 +119,8 @@ public:
template <bool EnableBool = true> accelerator
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
{
bgq_l1p_optimisation(1);
switch(Opt) {
@ -161,8 +164,8 @@ public:
template <bool EnableBool = true> accelerator
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) {
for (int site = 0; site < Nsite; site++) {
for (int s = 0; s < Ls; s++) {
@ -176,8 +179,8 @@ public:
}
}
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
//////////////////////////////////////////////////////////////////////////////
// Utilities for inserting Wilson conserved current.
@ -185,27 +188,27 @@ public:
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
const SitePropagator &q_in_2,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
bool switch_sign = false);
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
bool switch_sign = false);
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
SitePropagator &q_out,
DoubledGaugeField &U,
DoubledGaugeFieldView &U,
unsigned int sU,
unsigned int mu,
vInteger t_mask,
@ -213,60 +216,60 @@ public:
private:
// Specialised variants
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionField &in, FermionField &out);
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
public:

View File

@ -38,43 +38,43 @@ NAMESPACE_BEGIN(Grid);
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
template<class Impl> void
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
{
assert(0);
}
@ -83,21 +83,21 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
#define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
\
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
INSTANTIATE_ASM(WilsonImplF);
INSTANTIATE_ASM(WilsonImplD);

View File

@ -573,8 +573,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class Impl> void
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -600,8 +600,8 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -626,8 +626,8 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -654,8 +654,8 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -681,8 +681,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
}
template<class Impl> void
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
typedef typename Simd::scalar_type S;
@ -711,8 +711,8 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
}
template<class Impl>
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionField &in, FermionField &out)
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
@ -746,58 +746,58 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
const FermionFieldView &in, \
FermionFieldView &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
const FermionFieldView &in, \
FermionFieldView &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
const FermionFieldView &in, \
FermionFieldView &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
const FermionFieldView &in, \
FermionFieldView &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
const FermionFieldView &in, \
FermionFieldView &out){ assert(0); } \
template<> void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
LebesgueOrder &lo, \
DoubledGaugeField &U, \
DoubledGaugeFieldView &U, \
SiteHalfSpinor *buf, \
int sF,int sU, \
const FermionField &in, \
FermionField &out){ assert(0); } \
const FermionFieldView &in, \
FermionFieldView &out){ assert(0); } \
#define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -812,8 +812,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -828,8 +828,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -844,8 +844,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \
\
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -860,8 +860,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
} \
\
template<> void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -877,8 +877,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \
template<> \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out) \
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \
typedef IMPL Impl; \
typedef typename Simd::scalar_type S; \
@ -904,18 +904,18 @@ HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
////////////// Wilson ; uses this implementation /////////////////////
#define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionField &in, FermionField &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);\
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);