mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
View introduction to prepare for accelerator offload.
Probably same problem exists for stencil object
This commit is contained in:
parent
442b0b406c
commit
3277bda130
@ -38,17 +38,20 @@ NAMESPACE_BEGIN(Grid);
|
||||
// Pminus fowards
|
||||
// Pplus backwards..
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
const FermionField &phi_i,
|
||||
FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower,
|
||||
std::vector<Coeff_t> &diag,
|
||||
std::vector<Coeff_t> &upper)
|
||||
{
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls =this->Ls;
|
||||
GridBase *grid=psi.Grid();
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
M5Dcalls++;
|
||||
M5Dtime-=usecond();
|
||||
@ -81,17 +84,20 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
const FermionField &phi_i,
|
||||
FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower,
|
||||
std::vector<Coeff_t> &diag,
|
||||
std::vector<Coeff_t> &upper)
|
||||
{
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls =this->Ls;
|
||||
GridBase *grid=psi.Grid();
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
M5Dcalls++;
|
||||
@ -125,12 +131,14 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
|
||||
{
|
||||
GridBase *grid=psi.Grid();
|
||||
int Ls=this->Ls;
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
int Ls=this->Ls;
|
||||
|
||||
MooeeInvCalls++;
|
||||
MooeeInvTime-=usecond();
|
||||
@ -170,13 +178,15 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
|
||||
{
|
||||
GridBase *grid=psi.Grid();
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
int Ls=this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
std::vector<Coeff_t> ueec(Ls);
|
||||
std::vector<Coeff_t> deec(Ls);
|
||||
|
@ -51,14 +51,18 @@ void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
||||
}
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
const FermionField &phi_i,
|
||||
FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower,
|
||||
std::vector<Coeff_t> &diag,
|
||||
std::vector<Coeff_t> &upper)
|
||||
{
|
||||
GridBase *grid=psi.Grid();
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd= Simd::Nsimd();
|
||||
@ -70,8 +74,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||
assert(Ls/LLs==nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type * u_p = (scalar_type *)&u[0];
|
||||
@ -195,14 +197,18 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||
const FermionField &phi,
|
||||
FermionField &chi,
|
||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
const FermionField &phi_i,
|
||||
FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower,
|
||||
std::vector<Coeff_t> &diag,
|
||||
std::vector<Coeff_t> &upper)
|
||||
{
|
||||
GridBase *grid=psi.Grid();
|
||||
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||
GridBase *grid=psi_i.Grid();
|
||||
auto psi=psi_i.View();
|
||||
auto phi=phi_i.View();
|
||||
auto chi=chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd= Simd::Nsimd();
|
||||
@ -214,8 +220,6 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||
assert(Ls/LLs==nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type * u_p = (scalar_type *)&u[0];
|
||||
@ -339,11 +343,13 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi,
|
||||
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i,
|
||||
int LLs, int site,
|
||||
Vector<iSinglet<Simd> > &Matp,
|
||||
Vector<iSinglet<Simd> > &Matm)
|
||||
{
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
@ -513,11 +519,14 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
|
||||
|
||||
// Z-mobius version
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi,
|
||||
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i,
|
||||
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
|
||||
{
|
||||
#ifndef AVX512
|
||||
{
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
SiteHalfSpinor BcastP;
|
||||
SiteHalfSpinor BcastM;
|
||||
SiteHalfSpinor SiteChiP;
|
||||
@ -761,11 +770,12 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||
{
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
int Ls=this->Ls;
|
||||
int LLs = psi.Grid()->_rdimensions[0];
|
||||
int vol = psi.Grid()->oSites()/LLs;
|
||||
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
Vector<iSinglet<Simd> > Matp;
|
||||
Vector<iSinglet<Simd> > Matm;
|
||||
|
@ -40,14 +40,16 @@ NAMESPACE_BEGIN(Grid);
|
||||
// Pminus fowards
|
||||
// Pplus backwards..
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi.Grid();
|
||||
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto phi = phi_i.View();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
@ -78,13 +80,17 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
GridBase* grid = psi.Grid();
|
||||
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard()=psi.Checkerboard();
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
@ -116,16 +122,16 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
|
||||
{
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi=psi_i.View();
|
||||
auto chi=chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
thread_loop((int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
|
||||
|
||||
auto tmp1 = psi[0];
|
||||
@ -164,13 +170,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
|
||||
{
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
|
||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
std::vector<Coeff_t> ueec(Ls);
|
||||
std::vector<Coeff_t> deec(Ls+1);
|
||||
|
@ -52,10 +52,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
@ -67,8 +72,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
@ -197,10 +200,15 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
@ -212,8 +220,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
@ -342,9 +348,12 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
@ -532,12 +541,11 @@ void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fer
|
||||
template<class Impl>
|
||||
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi.Grid()->_rdimensions[0];
|
||||
int vol = psi.Grid()->oSites()/LLs;
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
Vector<iSinglet<Simd> > Matp;
|
||||
Vector<iSinglet<Simd> > Matm;
|
||||
Vector<iSinglet<Simd> > *_Matp;
|
||||
|
@ -149,7 +149,7 @@ public:
|
||||
typedef typename Impl::Compressor Compressor; \
|
||||
typedef typename Impl::StencilImpl StencilImpl; \
|
||||
typedef typename Impl::ImplParams ImplParams; \
|
||||
typedef typename Impl::Coeff_t Coeff_t; \
|
||||
typedef typename Impl::Coeff_t Coeff_t;
|
||||
|
||||
#define INHERIT_IMPL_TYPES(Base) \
|
||||
INHERIT_GIMPL_TYPES(Base) \
|
||||
@ -267,12 +267,14 @@ public:
|
||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||
GaugeLinkField tmp(mat.Grid());
|
||||
tmp = Zero();
|
||||
|
||||
auto tmp_v = tmp.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
auto Atilde_v = Atilde.View();
|
||||
thread_loop( (int sss=0;sss<tmp.Grid()->oSites();sss++),{
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||
tmp_v[sU] = tmp_v[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here
|
||||
}
|
||||
});
|
||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||
@ -499,13 +501,10 @@ public:
|
||||
|
||||
|
||||
const int Nsimd =vector_type::Nsimd();
|
||||
// const int Nsimd = grid->Nsimd();
|
||||
|
||||
GridBase *grid= St.Grid();
|
||||
int direction = St._directions[mu];
|
||||
int distance = St._distances[mu];
|
||||
int ptype = St._permute_type[mu];
|
||||
int sl = grid->_simd_layout[direction];
|
||||
int sl = St._simd_layout[direction];
|
||||
|
||||
// Fixme X.Y.Z.T hardcode in stencil
|
||||
int mmu = mu % Nd;
|
||||
@ -524,7 +523,7 @@ public:
|
||||
extract(chi,vals);
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,s);
|
||||
St.iCoorFromIindex(icoor,s);
|
||||
|
||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||
|
||||
@ -592,9 +591,13 @@ public:
|
||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||
}
|
||||
|
||||
thread_loop( (auto ss=U.begin();ss<U.end();ss++),{
|
||||
Uds[ss](0)(mu) = U[ss]();
|
||||
Uds[ss](1)(mu) = Uconj[ss]();
|
||||
auto U_v = U.View();
|
||||
auto Uds_v = Uds.View();
|
||||
auto Uconj_v = Uconj.View();
|
||||
auto Utmp_v= Utmp.View();
|
||||
thread_loop( (auto ss=U_v.begin();ss<U_v.end();ss++),{
|
||||
Uds_v[ss](0)(mu) = U_v[ss]();
|
||||
Uds_v[ss](1)(mu) = Uconj_v[ss]();
|
||||
});
|
||||
|
||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||
@ -605,9 +608,8 @@ public:
|
||||
Utmp = where(coor==0,Uconj,Utmp);
|
||||
}
|
||||
|
||||
|
||||
thread_loop((auto ss=U.begin();ss<U.end();ss++),{
|
||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
|
||||
Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
|
||||
});
|
||||
|
||||
Utmp = Uconj;
|
||||
@ -615,8 +617,8 @@ public:
|
||||
Utmp = where(coor==0,U,Utmp);
|
||||
}
|
||||
|
||||
thread_loop((auto ss=U.begin();ss<U.end();ss++),{
|
||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
|
||||
Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
|
||||
});
|
||||
|
||||
}
|
||||
@ -628,8 +630,10 @@ public:
|
||||
GaugeLinkField link(mat.Grid());
|
||||
// use lorentz for flavour as hack.
|
||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||
thread_loop((auto ss = tmp.begin(); ss < tmp.end(); ss++), {
|
||||
link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
|
||||
auto link_v = link.View();
|
||||
auto tmp_v = tmp.View();
|
||||
thread_loop((auto ss = tmp_v.begin(); ss < tmp_v.end(); ss++), {
|
||||
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
|
||||
});
|
||||
PokeIndex<LorentzIndex>(mat, link, mu);
|
||||
return;
|
||||
@ -641,11 +645,14 @@ public:
|
||||
|
||||
GaugeLinkField tmp(mat.Grid());
|
||||
tmp = Zero();
|
||||
auto tmp_v = tmp.View();
|
||||
auto Atilde_v = Atilde.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
int sF = s + Ls * ss;
|
||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
|
||||
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
|
||||
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||
}
|
||||
});
|
||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||
|
@ -249,8 +249,12 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
|
||||
////////////////////////
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto B_v = B.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
|
||||
Kernels::DhopDirK(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
|
||||
Kernels::DhopDirK(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
||||
});
|
||||
|
||||
// Force in three link terms
|
||||
@ -360,9 +364,12 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in, compressor);
|
||||
|
||||
auto Umu_v = Umu.View();
|
||||
auto UUUmu_v = UUUmu.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
|
||||
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
|
||||
Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
||||
});
|
||||
};
|
||||
|
||||
@ -377,13 +384,17 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
|
||||
Compressor compressor;
|
||||
st.HaloExchange(in, compressor);
|
||||
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
if (dag == DaggerYes) {
|
||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
|
||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
||||
});
|
||||
} else {
|
||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
|
||||
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
@ -171,12 +171,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
|
||||
|
||||
Compressor compressor;
|
||||
Stencil.HaloExchange(in,compressor);
|
||||
|
||||
auto Umu_v = Umu.View();
|
||||
auto UUUmu_v = UUUmu.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = in.View();
|
||||
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
|
||||
Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
|
||||
}
|
||||
});
|
||||
};
|
||||
@ -230,24 +233,26 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
||||
Compressor compressor;
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
|
||||
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
DhopCommTime -= usecond();
|
||||
st.HaloExchange(in,compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
auto U_v = U.View();
|
||||
auto UUU_v = UUU.View();
|
||||
auto out_v = out.View();
|
||||
auto in_v = in.View();
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
if (dag == DaggerYes) {
|
||||
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
|
||||
int sU=ss;
|
||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
|
||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
|
||||
});
|
||||
} else {
|
||||
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
|
||||
int sU=ss;
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
|
||||
});
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
|
@ -35,16 +35,18 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
GridBase *grid = psi.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
@ -76,16 +78,20 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &p
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||
std::vector<Coeff_t> &shift_coeffs)
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
GridBase *grid = psi.Grid();
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
@ -120,14 +126,17 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionFi
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
GridBase *grid = psi.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
@ -159,16 +168,19 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||
std::vector<Coeff_t> &shift_coeffs)
|
||||
{
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||
GridBase *grid = psi.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
@ -204,14 +216,15 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
|
||||
|
||||
GridBase *grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
@ -251,12 +264,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &ch
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
|
||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
|
||||
{
|
||||
GridBase *grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
@ -306,14 +321,15 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionFie
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
|
||||
{
|
||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
|
||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
|
||||
|
||||
GridBase *grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
@ -353,12 +369,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
|
||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
|
||||
{
|
||||
GridBase *grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase *grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
@ -63,10 +63,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, Fermion
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
@ -78,8 +82,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
scalar_type* u_p = (scalar_type*) &u[0];
|
||||
@ -208,11 +210,14 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi_i, const FermionField& phi_i,
|
||||
FermionField& chi_i, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
#if 0
|
||||
auto & psi = psi_i;
|
||||
auto & phi = phi_i;
|
||||
auto & chi = chi_i;
|
||||
|
||||
this->M5D(psi, phi, chi, lower, diag, upper);
|
||||
|
||||
@ -225,8 +230,11 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
const int nsimd = Simd::Nsimd();
|
||||
@ -239,7 +247,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
@ -389,10 +396,14 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||
{
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
@ -404,7 +415,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
@ -531,12 +541,14 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
|
||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
|
||||
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||
std::vector<Coeff_t>& shift_coeffs)
|
||||
{
|
||||
#if 0
|
||||
|
||||
auto & psi = psi_i;
|
||||
auto & phi = phi_i;
|
||||
auto & chi = chi_i;
|
||||
this->M5Ddag(psi, phi, chi, lower, diag, upper);
|
||||
|
||||
// FIXME: possible gain from vectorizing shift operation as well?
|
||||
@ -548,8 +560,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
GridBase* grid = psi.Grid();
|
||||
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||
GridBase* grid = psi_i.Grid();
|
||||
auto psi = psi_i.View();
|
||||
auto phi = phi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
int Ls = this->Ls;
|
||||
int LLs = grid->_rdimensions[0];
|
||||
int nsimd = Simd::Nsimd();
|
||||
@ -562,7 +577,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
||||
assert(Ls/LLs == nsimd);
|
||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
// just directly address via type pun
|
||||
typedef typename Simd::scalar_type scalar_type;
|
||||
@ -717,9 +731,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
||||
#endif
|
||||
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
|
||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||
{
|
||||
auto psi = psi_i.View();
|
||||
auto chi = chi_i.View();
|
||||
#ifndef AVX512
|
||||
{
|
||||
SiteHalfSpinor BcastP;
|
||||
@ -909,12 +925,12 @@ void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fermion
|
||||
template<class Impl>
|
||||
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||
{
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
int Ls = this->Ls;
|
||||
int LLs = psi.Grid()->_rdimensions[0];
|
||||
int vol = psi.Grid()->oSites()/LLs;
|
||||
|
||||
chi.Checkerboard() = psi.Checkerboard();
|
||||
|
||||
Vector<iSinglet<Simd>> Matp;
|
||||
Vector<iSinglet<Simd>> Matm;
|
||||
Vector<iSinglet<Simd>>* _Matp;
|
||||
|
@ -40,9 +40,9 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
||||
////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
|
||||
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
@ -183,9 +183,9 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out) {
|
||||
const FermionFieldView &in, FermionFieldView &out) {
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int oneLink =0;
|
||||
@ -221,9 +221,9 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
@ -258,8 +258,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int disp)
|
||||
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
|
||||
{
|
||||
// Disp should be either +1,-1,+3,-3
|
||||
// What about "dag" ?
|
||||
|
@ -47,29 +47,33 @@ public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
typedef typename ViewMap<FermionField>::Type FermionFieldView;
|
||||
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
void DhopDirK(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
|
||||
void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
||||
|
||||
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
|
||||
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink);
|
||||
|
||||
|
||||
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
|
||||
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, SiteSpinor&out,int threeLink);
|
||||
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
|
||||
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf,
|
||||
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
public:
|
||||
|
||||
|
@ -580,10 +580,10 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
};
|
||||
@ -644,10 +644,10 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
#include <simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -694,10 +694,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -775,10 +775,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
|
||||
#include <simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -840,10 +840,10 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
DoubledGaugeFieldView &U,
|
||||
DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -905,10 +905,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
||||
|
||||
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
|
||||
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeField &UUU, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
DoubledGaugeFieldView &UUU, \
|
||||
SiteSpinor *buf, int LLs, \
|
||||
int sU, const FermionField &in, FermionField &out);
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
|
||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
|
||||
|
@ -89,9 +89,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out, int dag)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||
{
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
@ -110,9 +110,9 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, Do
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
|
||||
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
@ -298,14 +298,14 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
||||
|
||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
||||
SiteSpinor *buf, int LLs, \
|
||||
int sU, const FermionField &in, FermionField &out, int dag);
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
|
||||
|
||||
#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, \
|
||||
SiteSpinor *buf, int sF, \
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) ;
|
||||
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) ;
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
|
||||
|
@ -216,8 +216,11 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
////////////////////////
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
auto U_v = U.View();
|
||||
auto B_v = B.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{
|
||||
Kernels::DhopDirK(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
|
||||
Kernels::DhopDirK(st, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma);
|
||||
});
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
@ -316,9 +319,11 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||
Compressor compressor(dag);
|
||||
|
||||
Stencil.HaloExchange(in, compressor);
|
||||
|
||||
auto in_v = in.View();
|
||||
auto out_v = in.View();
|
||||
auto Umu_v = Umu.View();
|
||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{
|
||||
Kernels::DhopDirK(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
|
||||
Kernels::DhopDirK(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma);
|
||||
});
|
||||
};
|
||||
|
||||
@ -333,13 +338,16 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
st.HaloExchange(in, compressor);
|
||||
|
||||
int Opt = WilsonKernelsStatic::Opt;
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v= out.View();
|
||||
if (dag == DaggerYes) {
|
||||
accelerator_loop( sss,in, {
|
||||
Kernels::DhopSiteDag(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
accelerator_loop( sss,in_v, {
|
||||
Kernels::DhopSiteDag(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
|
||||
});
|
||||
} else {
|
||||
accelerator_loop( sss,in, {
|
||||
Kernels::DhopSite(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
accelerator_loop( sss,in_v, {
|
||||
Kernels::DhopSite(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
|
||||
});
|
||||
}
|
||||
};
|
||||
@ -367,15 +375,21 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
// Inefficient comms method but not performance critical.
|
||||
tmp1 = Cshift(q_in_1, mu, 1);
|
||||
tmp2 = Cshift(q_in_2, mu, 1);
|
||||
auto tmp1_v = tmp1.View();
|
||||
auto tmp2_v = tmp2.View();
|
||||
auto q_in_1_v=q_in_1.View();
|
||||
auto q_in_2_v=q_in_2.View();
|
||||
auto q_out_v = q_out.View();
|
||||
auto Umu_v = Umu.View();
|
||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1[sU],
|
||||
q_in_2[sU],
|
||||
q_out[sU],
|
||||
Umu, sU, mu);
|
||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sU],
|
||||
tmp2[sU],
|
||||
q_out[sU],
|
||||
Umu, sU, mu);
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
|
||||
q_in_2_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu);
|
||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
|
||||
tmp2_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu);
|
||||
});
|
||||
}
|
||||
|
||||
@ -415,34 +429,40 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
tmp = ph*q_in;
|
||||
tmpBwd = Cshift(tmp, mu, -1);
|
||||
|
||||
auto coords_v = coords.View();
|
||||
auto tmpFwd_v = tmpFwd.View();
|
||||
auto tmpBwd_v = tmpBwd.View();
|
||||
auto Umu_v = Umu.View();
|
||||
auto q_out_v = q_out.View();
|
||||
|
||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
||||
|
||||
// Compute the sequential conserved current insertion only if our simd
|
||||
// object contains a timeslice we need.
|
||||
vInteger t_mask = ((coords[sU] >= tmin) &&
|
||||
(coords[sU] <= tmax));
|
||||
vInteger t_mask = ((coords_v[sU] >= tmin) &&
|
||||
(coords_v[sU] <= tmax));
|
||||
Integer timeSlices = Reduce(t_mask);
|
||||
|
||||
if (timeSlices > 0) {
|
||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sU],
|
||||
q_out[sU],
|
||||
Umu, sU, mu, t_mask);
|
||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, t_mask);
|
||||
}
|
||||
|
||||
// Repeat for backward direction.
|
||||
t_mask = ((coords[sU] >= (tmin + tshift)) &&
|
||||
(coords[sU] <= (tmax + tshift)));
|
||||
t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
|
||||
(coords_v[sU] <= (tmax + tshift)));
|
||||
|
||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||
unsigned int t0 = 0;
|
||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
|
||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
|
||||
|
||||
timeSlices = Reduce(t_mask);
|
||||
|
||||
if (timeSlices > 0) {
|
||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sU],
|
||||
q_out[sU],
|
||||
Umu, sU, mu, t_mask);
|
||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, t_mask);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -244,11 +244,14 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
|
||||
assert(dirdisp<=7);
|
||||
assert(dirdisp>=0);
|
||||
|
||||
auto Umu_v = Umu.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DhopDirK(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
|
||||
Kernels::DhopDirK(Stencil,Umu_v,Stencil.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
|
||||
}
|
||||
});
|
||||
};
|
||||
@ -279,7 +282,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
Atilde=A;
|
||||
int LLs = B.Grid()->_rdimensions[0];
|
||||
|
||||
|
||||
DerivComputeTime-=usecond();
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -293,15 +295,20 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
////////////////////////
|
||||
|
||||
DerivDhopComputeTime -= usecond();
|
||||
auto U_v = U.View();
|
||||
auto Btilde_v = Btilde.View();
|
||||
auto B_v = B.View();
|
||||
int Bsites = B.Grid()->oSites();
|
||||
int Usites = U.Grid()->oSites();
|
||||
thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
int sU = sss;
|
||||
int sF = s + Ls * sU;
|
||||
|
||||
assert(sF < B.Grid()->oSites());
|
||||
assert(sU < U.Grid()->oSites());
|
||||
assert(sF < Bsites);
|
||||
assert(sU < Usites);
|
||||
|
||||
Kernels::DhopDirK(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
|
||||
Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma);
|
||||
|
||||
////////////////////////////
|
||||
// spin trace outer product
|
||||
@ -406,6 +413,9 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
@ -435,13 +445,13 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
|
||||
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
|
||||
}
|
||||
} else {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
|
||||
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
|
||||
}
|
||||
}
|
||||
ptime = usecond() - start;
|
||||
@ -470,14 +480,14 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
thread_loop( (int ss = 0; ss < sz; ss++) ,{
|
||||
int sU = st.surface_list[ss];
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
||||
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
|
||||
});
|
||||
} else {
|
||||
int sz=st.surface_list.size();
|
||||
thread_loop( (int ss = 0; ss < sz; ss++) ,{
|
||||
int sU = st.surface_list[ss];
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
||||
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
|
||||
});
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
@ -505,17 +515,20 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
|
||||
int Opt = WilsonKernelsStatic::Opt;
|
||||
auto U_v = U.View();
|
||||
auto in_v = in.View();
|
||||
auto out_v = out.View();
|
||||
if (dag == DaggerYes) {
|
||||
accelerator_loop( ss, U, {
|
||||
accelerator_loop( ss, U_v, {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
|
||||
});
|
||||
} else {
|
||||
accelerator_loop( ss, U , {
|
||||
accelerator_loop( ss, U_v , {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
|
||||
});
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
@ -738,10 +751,17 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
|
||||
q_out = Zero();
|
||||
|
||||
auto q_in_1_v = q_in_1.View();
|
||||
auto q_in_2_v = q_in_2.View();
|
||||
auto tmp1_v = tmp1.View();
|
||||
auto tmp2_v = tmp2.View();
|
||||
auto q_out_v = q_out.View();
|
||||
auto Umu_v = Umu.View();
|
||||
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
|
||||
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
|
||||
tmp1 = Cshift(q_in_1, mu + 1, 1);
|
||||
tmp2 = Cshift(q_in_2, mu + 1, 1);
|
||||
|
||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
||||
unsigned int sF1 = sU * LLs;
|
||||
unsigned int sF2 = (sU + 1) * LLs - 1;
|
||||
@ -755,20 +775,20 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
||||
// If vectorised in 5th dimension, reverse q2 vector to match up
|
||||
// sites correctly.
|
||||
if (Impl::LsVectorised) {
|
||||
REVERSE_LS(q_in_2[sF2], qSite2, Ls / LLs);
|
||||
REVERSE_LS(tmp2[sF2], qmuSite2, Ls / LLs);
|
||||
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
|
||||
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
|
||||
} else {
|
||||
qSite2 = q_in_2[sF2];
|
||||
qmuSite2 = tmp2[sF2];
|
||||
qSite2 = q_in_2_v[sF2];
|
||||
qmuSite2 = tmp2_v[sF2];
|
||||
}
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1[sF1],
|
||||
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
|
||||
qSite2,
|
||||
q_out[sU],
|
||||
Umu, sU, mu, axial_sign);
|
||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sF1],
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, axial_sign);
|
||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
|
||||
qmuSite2,
|
||||
q_out[sU],
|
||||
Umu, sU, mu, axial_sign);
|
||||
q_out_v[sU],
|
||||
Umu_v, sU, mu, axial_sign);
|
||||
sF1++;
|
||||
sF2--;
|
||||
}
|
||||
@ -808,7 +828,7 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
q_out = Zero();
|
||||
LatticeInteger coords(_FourDimGrid);
|
||||
LatticeCoordinate(coords, Tp);
|
||||
|
||||
auto coords_v = coords.View();
|
||||
// Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu
|
||||
// by one.
|
||||
tmp = Cshift(q_in, mu + 1, 1);
|
||||
@ -816,11 +836,15 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
tmp = ph*q_in;
|
||||
tmpBwd = Cshift(tmp, mu + 1, -1);
|
||||
|
||||
auto tmpBwd_v = tmpBwd.View();
|
||||
auto tmpFwd_v = tmpFwd.View();
|
||||
auto q_out_v = q_out.View();
|
||||
auto Umu_v = Umu.View();
|
||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{
|
||||
// Compute the sequential conserved current insertion only if our simd
|
||||
// object contains a timeslice we need.
|
||||
vInteger t_mask = ((coords[sU] >= tmin) &&
|
||||
(coords[sU] <= tmax));
|
||||
vInteger t_mask = ((coords_v[sU] >= tmin) &&
|
||||
(coords_v[sU] <= tmax));
|
||||
Integer timeSlices = Reduce(t_mask);
|
||||
|
||||
if (timeSlices > 0) {
|
||||
@ -828,20 +852,20 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
unsigned int sF = sU * LLs;
|
||||
for (unsigned int s = 0; s < LLs; ++s) {
|
||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sF],
|
||||
q_out[sF], Umu, sU,
|
||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sF],
|
||||
q_out_v[sF], Umu_v, sU,
|
||||
mu, t_mask, axial_sign);
|
||||
++sF;
|
||||
}
|
||||
}
|
||||
|
||||
// Repeat for backward direction.
|
||||
t_mask = ((coords[sU] >= (tmin + tshift)) &&
|
||||
(coords[sU] <= (tmax + tshift)));
|
||||
t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
|
||||
(coords_v[sU] <= (tmax + tshift)));
|
||||
|
||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||
unsigned int t0 = 0;
|
||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
|
||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
|
||||
|
||||
timeSlices = Reduce(t_mask);
|
||||
|
||||
@ -849,8 +873,8 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
unsigned int sF = sU * LLs;
|
||||
for (unsigned int s = 0; s < LLs; ++s) {
|
||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sF],
|
||||
q_out[sF], Umu, sU,
|
||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sF],
|
||||
q_out_v[sF], Umu_v, sU,
|
||||
mu, t_mask, axial_sign);
|
||||
++sF;
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
|
||||
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
|
||||
|
||||
template <class Impl>
|
||||
accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
@ -103,9 +103,9 @@ accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||
// All legs kernels ; comms then compute
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
@ -127,9 +127,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, Lebesg
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
@ -153,9 +153,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueO
|
||||
// Interior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
@ -178,9 +178,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, Leb
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
@ -204,9 +204,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, Lebesg
|
||||
// Exterior kernels
|
||||
////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
// SiteHalfSpinor tmp;
|
||||
// SiteHalfSpinor chi;
|
||||
@ -231,9 +231,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, Leb
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
// SiteHalfSpinor tmp;
|
||||
// SiteHalfSpinor chi;
|
||||
@ -258,9 +258,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, Lebesg
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||
|
||||
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteSpinor result;
|
||||
@ -300,21 +300,21 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
|
||||
const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result, tmp;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
|
||||
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
|
||||
|
||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
||||
if (switch_sign)
|
||||
{
|
||||
|
||||
if (switch_sign) {
|
||||
q_out -= result;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
q_out += result;
|
||||
}
|
||||
}
|
||||
@ -330,21 +330,20 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
|
||||
const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result, tmp;
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
|
||||
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
|
||||
|
||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
||||
if (switch_sign)
|
||||
{
|
||||
if (switch_sign) {
|
||||
q_out += result;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
q_out -= result;
|
||||
}
|
||||
}
|
||||
@ -356,7 +355,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
|
||||
const SitePropagator &q_in_1, \
|
||||
const SitePropagator &q_in_2, \
|
||||
SitePropagator &q_out, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
unsigned int sU, \
|
||||
unsigned int mu, \
|
||||
bool switch_sign) \
|
||||
@ -368,7 +367,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
|
||||
const SitePropagator &q_in_1, \
|
||||
const SitePropagator &q_in_2, \
|
||||
SitePropagator &q_out, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
unsigned int mu, \
|
||||
unsigned int sU, \
|
||||
bool switch_sign) \
|
||||
@ -391,25 +390,23 @@ NO_CURR_SITE(GparityWilsonImplDF);
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
bool switch_sign)
|
||||
{
|
||||
SitePropagator result;
|
||||
|
||||
Impl::multLinkProp(result, U[sU], q_in, mu);
|
||||
result = WilsonCurrentFwd(result, mu);
|
||||
|
||||
// Zero any unwanted timeslice entries.
|
||||
result = predicatedWhere(t_mask, result, 0.*result);
|
||||
|
||||
if (switch_sign)
|
||||
{
|
||||
if (switch_sign) {
|
||||
q_out -= result;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
q_out += result;
|
||||
}
|
||||
}
|
||||
@ -423,7 +420,7 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
@ -436,12 +433,9 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
// Zero any unwanted timeslice entries.
|
||||
result = predicatedWhere(t_mask, result, 0.*result);
|
||||
|
||||
if (switch_sign)
|
||||
{
|
||||
if (switch_sign) {
|
||||
q_out += result;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
q_out -= result;
|
||||
}
|
||||
}
|
||||
|
@ -51,12 +51,15 @@ public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
typedef typename ViewMap<FermionField>::Type FermionFieldView;
|
||||
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
|
||||
|
||||
public:
|
||||
|
||||
template <bool EnableBool = true> accelerator
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
||||
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
||||
{
|
||||
bgq_l1p_optimisation(1);
|
||||
switch(Opt) {
|
||||
@ -99,8 +102,8 @@ public:
|
||||
|
||||
template <bool EnableBool = true> accelerator
|
||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
|
||||
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) {
|
||||
// no kernel choice
|
||||
for (int site = 0; site < Nsite; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
@ -116,8 +119,8 @@ public:
|
||||
|
||||
template <bool EnableBool = true> accelerator
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
||||
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
||||
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
||||
{
|
||||
bgq_l1p_optimisation(1);
|
||||
switch(Opt) {
|
||||
@ -161,8 +164,8 @@ public:
|
||||
|
||||
template <bool EnableBool = true> accelerator
|
||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
||||
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
|
||||
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) {
|
||||
|
||||
for (int site = 0; site < Nsite; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
@ -176,8 +179,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Utilities for inserting Wilson conserved current.
|
||||
@ -185,27 +188,27 @@ public:
|
||||
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign = false);
|
||||
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
|
||||
const SitePropagator &q_in_2,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
bool switch_sign = false);
|
||||
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
bool switch_sign = false);
|
||||
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||
SitePropagator &q_out,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeFieldView &U,
|
||||
unsigned int sU,
|
||||
unsigned int mu,
|
||||
vInteger t_mask,
|
||||
@ -213,60 +216,60 @@ public:
|
||||
|
||||
private:
|
||||
// Specialised variants
|
||||
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
||||
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
|
||||
|
||||
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
||||
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
||||
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
|
||||
|
||||
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
||||
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
||||
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
|
||||
|
||||
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
||||
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
|
||||
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
public:
|
||||
|
||||
|
@ -38,43 +38,43 @@ NAMESPACE_BEGIN(Grid);
|
||||
// Default to no assembler implementation
|
||||
///////////////////////////////////////////////////////////
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
@ -83,21 +83,21 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
|
||||
|
||||
#define INSTANTIATE_ASM(A)\
|
||||
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||
\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||
\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||
\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||
|
||||
INSTANTIATE_ASM(WilsonImplF);
|
||||
INSTANTIATE_ASM(WilsonImplD);
|
||||
|
@ -573,8 +573,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -600,8 +600,8 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
@ -626,8 +626,8 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -654,8 +654,8 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
@ -681,8 +681,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -711,8 +711,8 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
@ -746,58 +746,58 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
const FermionFieldView &in, \
|
||||
FermionFieldView &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
const FermionFieldView &in, \
|
||||
FermionFieldView &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
const FermionFieldView &in, \
|
||||
FermionFieldView &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
const FermionFieldView &in, \
|
||||
FermionFieldView &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
const FermionFieldView &in, \
|
||||
FermionFieldView &out){ assert(0); } \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
|
||||
LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeFieldView &U, \
|
||||
SiteHalfSpinor *buf, \
|
||||
int sF,int sU, \
|
||||
const FermionField &in, \
|
||||
FermionField &out){ assert(0); } \
|
||||
const FermionFieldView &in, \
|
||||
FermionFieldView &out){ assert(0); } \
|
||||
|
||||
|
||||
|
||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
@ -812,8 +812,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
@ -828,8 +828,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
@ -844,8 +844,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
} \
|
||||
\
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
@ -860,8 +860,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
} \
|
||||
\
|
||||
template<> void \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
@ -877,8 +877,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||
} \
|
||||
template<> \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||
{ \
|
||||
typedef IMPL Impl; \
|
||||
typedef typename Simd::scalar_type S; \
|
||||
@ -904,18 +904,18 @@ HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);\
|
||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
|
||||
|
||||
INSTANTIATE_THEM(WilsonImplF);
|
||||
INSTANTIATE_THEM(WilsonImplD);
|
||||
|
Loading…
Reference in New Issue
Block a user