mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
View introduction to prepare for accelerator offload.
Probably same problem exists for stencil object
This commit is contained in:
parent
442b0b406c
commit
3277bda130
@ -38,17 +38,20 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Pminus fowards
|
// Pminus fowards
|
||||||
// Pplus backwards..
|
// Pplus backwards..
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||||
const FermionField &phi,
|
const FermionField &phi_i,
|
||||||
FermionField &chi,
|
FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<Coeff_t> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<Coeff_t> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||||
|
GridBase *grid=psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
GridBase *grid=psi.Grid();
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
M5Dcalls++;
|
M5Dcalls++;
|
||||||
M5Dtime-=usecond();
|
M5Dtime-=usecond();
|
||||||
@ -81,17 +84,20 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||||
const FermionField &phi,
|
const FermionField &phi_i,
|
||||||
FermionField &chi,
|
FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<Coeff_t> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<Coeff_t> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||||
|
GridBase *grid=psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
GridBase *grid=psi.Grid();
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
M5Dcalls++;
|
M5Dcalls++;
|
||||||
@ -125,12 +131,14 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi_i)
|
||||||
{
|
{
|
||||||
GridBase *grid=psi.Grid();
|
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||||
int Ls=this->Ls;
|
GridBase *grid=psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
int Ls=this->Ls;
|
||||||
|
|
||||||
MooeeInvCalls++;
|
MooeeInvCalls++;
|
||||||
MooeeInvTime-=usecond();
|
MooeeInvTime-=usecond();
|
||||||
@ -170,13 +178,15 @@ void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i)
|
||||||
{
|
{
|
||||||
GridBase *grid=psi.Grid();
|
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||||
|
GridBase *grid=psi_i.Grid();
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
|
|
||||||
std::vector<Coeff_t> ueec(Ls);
|
std::vector<Coeff_t> ueec(Ls);
|
||||||
std::vector<Coeff_t> deec(Ls);
|
std::vector<Coeff_t> deec(Ls);
|
||||||
|
@ -51,14 +51,18 @@ void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
|||||||
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||||
const FermionField &phi,
|
const FermionField &phi_i,
|
||||||
FermionField &chi,
|
FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<Coeff_t> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<Coeff_t> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
GridBase *grid=psi.Grid();
|
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||||
|
GridBase *grid=psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
const int nsimd= Simd::Nsimd();
|
const int nsimd= Simd::Nsimd();
|
||||||
@ -70,8 +74,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
assert(Ls/LLs==nsimd);
|
assert(Ls/LLs==nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
scalar_type * u_p = (scalar_type *)&u[0];
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
@ -124,7 +126,7 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for(int v=0;v<LLs;v++){
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
vprefetch(psi[ss+v+LLs]);
|
vprefetch(psi[ss+v+LLs]);
|
||||||
|
|
||||||
int vp= (v==LLs-1) ? 0 : v+1;
|
int vp= (v==LLs-1) ? 0 : v+1;
|
||||||
@ -195,14 +197,18 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||||
const FermionField &phi,
|
const FermionField &phi_i,
|
||||||
FermionField &chi,
|
FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower,
|
std::vector<Coeff_t> &lower,
|
||||||
std::vector<Coeff_t> &diag,
|
std::vector<Coeff_t> &diag,
|
||||||
std::vector<Coeff_t> &upper)
|
std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
GridBase *grid=psi.Grid();
|
chi_i.Checkerboard()=psi_i.Checkerboard();
|
||||||
|
GridBase *grid=psi_i.Grid();
|
||||||
|
auto psi=psi_i.View();
|
||||||
|
auto phi=phi_i.View();
|
||||||
|
auto chi=chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
int nsimd= Simd::Nsimd();
|
int nsimd= Simd::Nsimd();
|
||||||
@ -214,8 +220,6 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
|||||||
assert(Ls/LLs==nsimd);
|
assert(Ls/LLs==nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
scalar_type * u_p = (scalar_type *)&u[0];
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
@ -339,11 +343,13 @@ void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionField &chi,
|
void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i,
|
||||||
int LLs, int site,
|
int LLs, int site,
|
||||||
Vector<iSinglet<Simd> > &Matp,
|
Vector<iSinglet<Simd> > &Matp,
|
||||||
Vector<iSinglet<Simd> > &Matm)
|
Vector<iSinglet<Simd> > &Matm)
|
||||||
{
|
{
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
#ifndef AVX512
|
#ifndef AVX512
|
||||||
{
|
{
|
||||||
SiteHalfSpinor BcastP;
|
SiteHalfSpinor BcastP;
|
||||||
@ -513,11 +519,14 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
|
|||||||
|
|
||||||
// Z-mobius version
|
// Z-mobius version
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionField &chi,
|
void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i,
|
||||||
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
|
int LLs, int site, Vector<iSinglet<Simd> > &Matp, Vector<iSinglet<Simd> > &Matm)
|
||||||
{
|
{
|
||||||
#ifndef AVX512
|
#ifndef AVX512
|
||||||
{
|
{
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
SiteHalfSpinor BcastP;
|
SiteHalfSpinor BcastP;
|
||||||
SiteHalfSpinor BcastM;
|
SiteHalfSpinor BcastM;
|
||||||
SiteHalfSpinor SiteChiP;
|
SiteHalfSpinor SiteChiP;
|
||||||
@ -761,11 +770,12 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
{
|
{
|
||||||
|
chi.Checkerboard()=psi.Checkerboard();
|
||||||
|
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
int LLs = psi.Grid()->_rdimensions[0];
|
int LLs = psi.Grid()->_rdimensions[0];
|
||||||
int vol = psi.Grid()->oSites()/LLs;
|
int vol = psi.Grid()->oSites()/LLs;
|
||||||
|
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
|
|
||||||
Vector<iSinglet<Simd> > Matp;
|
Vector<iSinglet<Simd> > Matp;
|
||||||
Vector<iSinglet<Simd> > Matm;
|
Vector<iSinglet<Simd> > Matm;
|
||||||
|
@ -40,18 +40,20 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Pminus fowards
|
// Pminus fowards
|
||||||
// Pplus backwards..
|
// Pplus backwards..
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase* grid = psi.Grid();
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
this->M5Dtime -= usecond();
|
this->M5Dtime -= usecond();
|
||||||
|
|
||||||
thread_loop( (int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
|
thread_loop( (int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
|
||||||
for(int s=0; s<Ls; s++){
|
for(int s=0; s<Ls; s++){
|
||||||
auto tmp = psi[0];
|
auto tmp = psi[0];
|
||||||
@ -78,13 +80,17 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase* grid = psi.Grid();
|
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard()=psi.Checkerboard();
|
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -116,16 +122,16 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField& chi)
|
void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionField& chi_i)
|
||||||
{
|
{
|
||||||
GridBase* grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi=psi_i.View();
|
||||||
|
auto chi=chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
|
||||||
thread_loop((int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
|
thread_loop((int ss=0; ss<grid->oSites(); ss+=Ls),{ // adds Ls
|
||||||
|
|
||||||
auto tmp1 = psi[0];
|
auto tmp1 = psi[0];
|
||||||
@ -164,13 +170,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi, FermionField& chi)
|
void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i)
|
||||||
{
|
{
|
||||||
GridBase* grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
std::vector<Coeff_t> ueec(Ls);
|
std::vector<Coeff_t> ueec(Ls);
|
||||||
std::vector<Coeff_t> deec(Ls+1);
|
std::vector<Coeff_t> deec(Ls+1);
|
||||||
|
@ -52,10 +52,15 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi, FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||||
{
|
{
|
||||||
GridBase* grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
const int nsimd = Simd::Nsimd();
|
const int nsimd = Simd::Nsimd();
|
||||||
@ -67,8 +72,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
|||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
scalar_type* u_p = (scalar_type*) &u[0];
|
scalar_type* u_p = (scalar_type*) &u[0];
|
||||||
@ -197,10 +200,15 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionFiel
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||||
{
|
{
|
||||||
GridBase* grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
int nsimd = Simd::Nsimd();
|
int nsimd = Simd::Nsimd();
|
||||||
@ -212,8 +220,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
|||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
scalar_type* u_p = (scalar_type*) &u[0];
|
scalar_type* u_p = (scalar_type*) &u[0];
|
||||||
@ -342,9 +348,12 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionF
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
|
||||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||||
{
|
{
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
#ifndef AVX512
|
#ifndef AVX512
|
||||||
{
|
{
|
||||||
SiteHalfSpinor BcastP;
|
SiteHalfSpinor BcastP;
|
||||||
@ -532,12 +541,11 @@ void DomainWallEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fer
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||||
{
|
{
|
||||||
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = psi.Grid()->_rdimensions[0];
|
int LLs = psi.Grid()->_rdimensions[0];
|
||||||
int vol = psi.Grid()->oSites()/LLs;
|
int vol = psi.Grid()->oSites()/LLs;
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
Vector<iSinglet<Simd> > Matp;
|
Vector<iSinglet<Simd> > Matp;
|
||||||
Vector<iSinglet<Simd> > Matm;
|
Vector<iSinglet<Simd> > Matm;
|
||||||
Vector<iSinglet<Simd> > *_Matp;
|
Vector<iSinglet<Simd> > *_Matp;
|
||||||
|
@ -149,8 +149,8 @@ public:
|
|||||||
typedef typename Impl::Compressor Compressor; \
|
typedef typename Impl::Compressor Compressor; \
|
||||||
typedef typename Impl::StencilImpl StencilImpl; \
|
typedef typename Impl::StencilImpl StencilImpl; \
|
||||||
typedef typename Impl::ImplParams ImplParams; \
|
typedef typename Impl::ImplParams ImplParams; \
|
||||||
typedef typename Impl::Coeff_t Coeff_t; \
|
typedef typename Impl::Coeff_t Coeff_t;
|
||||||
|
|
||||||
#define INHERIT_IMPL_TYPES(Base) \
|
#define INHERIT_IMPL_TYPES(Base) \
|
||||||
INHERIT_GIMPL_TYPES(Base) \
|
INHERIT_GIMPL_TYPES(Base) \
|
||||||
INHERIT_FIMPL_TYPES(Base)
|
INHERIT_FIMPL_TYPES(Base)
|
||||||
@ -267,12 +267,14 @@ public:
|
|||||||
int Ls=Btilde.Grid()->_fdimensions[0];
|
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||||
GaugeLinkField tmp(mat.Grid());
|
GaugeLinkField tmp(mat.Grid());
|
||||||
tmp = Zero();
|
tmp = Zero();
|
||||||
|
auto tmp_v = tmp.View();
|
||||||
|
auto Btilde_v = Btilde.View();
|
||||||
|
auto Atilde_v = Atilde.View();
|
||||||
thread_loop( (int sss=0;sss<tmp.Grid()->oSites();sss++),{
|
thread_loop( (int sss=0;sss<tmp.Grid()->oSites();sss++),{
|
||||||
int sU=sss;
|
int sU=sss;
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sF = s+Ls*sU;
|
int sF = s+Ls*sU;
|
||||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
tmp_v[sU] = tmp_v[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||||
@ -499,13 +501,10 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
const int Nsimd =vector_type::Nsimd();
|
const int Nsimd =vector_type::Nsimd();
|
||||||
// const int Nsimd = grid->Nsimd();
|
|
||||||
|
|
||||||
GridBase *grid= St.Grid();
|
|
||||||
int direction = St._directions[mu];
|
int direction = St._directions[mu];
|
||||||
int distance = St._distances[mu];
|
int distance = St._distances[mu];
|
||||||
int ptype = St._permute_type[mu];
|
int ptype = St._permute_type[mu];
|
||||||
int sl = grid->_simd_layout[direction];
|
int sl = St._simd_layout[direction];
|
||||||
|
|
||||||
// Fixme X.Y.Z.T hardcode in stencil
|
// Fixme X.Y.Z.T hardcode in stencil
|
||||||
int mmu = mu % Nd;
|
int mmu = mu % Nd;
|
||||||
@ -524,7 +523,7 @@ public:
|
|||||||
extract(chi,vals);
|
extract(chi,vals);
|
||||||
for(int s=0;s<Nsimd;s++){
|
for(int s=0;s<Nsimd;s++){
|
||||||
|
|
||||||
grid->iCoorFromIindex(icoor,s);
|
St.iCoorFromIindex(icoor,s);
|
||||||
|
|
||||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||||
|
|
||||||
@ -592,9 +591,13 @@ public:
|
|||||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_loop( (auto ss=U.begin();ss<U.end();ss++),{
|
auto U_v = U.View();
|
||||||
Uds[ss](0)(mu) = U[ss]();
|
auto Uds_v = Uds.View();
|
||||||
Uds[ss](1)(mu) = Uconj[ss]();
|
auto Uconj_v = Uconj.View();
|
||||||
|
auto Utmp_v= Utmp.View();
|
||||||
|
thread_loop( (auto ss=U_v.begin();ss<U_v.end();ss++),{
|
||||||
|
Uds_v[ss](0)(mu) = U_v[ss]();
|
||||||
|
Uds_v[ss](1)(mu) = Uconj_v[ss]();
|
||||||
});
|
});
|
||||||
|
|
||||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||||
@ -605,9 +608,8 @@ public:
|
|||||||
Utmp = where(coor==0,Uconj,Utmp);
|
Utmp = where(coor==0,Uconj,Utmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
|
||||||
thread_loop((auto ss=U.begin();ss<U.end();ss++),{
|
Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
|
||||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
|
||||||
});
|
});
|
||||||
|
|
||||||
Utmp = Uconj;
|
Utmp = Uconj;
|
||||||
@ -615,8 +617,8 @@ public:
|
|||||||
Utmp = where(coor==0,U,Utmp);
|
Utmp = where(coor==0,U,Utmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_loop((auto ss=U.begin();ss<U.end();ss++),{
|
thread_loop((auto ss=Utmp_v.begin();ss<Utmp_v.end();ss++),{
|
||||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -628,8 +630,10 @@ public:
|
|||||||
GaugeLinkField link(mat.Grid());
|
GaugeLinkField link(mat.Grid());
|
||||||
// use lorentz for flavour as hack.
|
// use lorentz for flavour as hack.
|
||||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||||
thread_loop((auto ss = tmp.begin(); ss < tmp.end(); ss++), {
|
auto link_v = link.View();
|
||||||
link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
|
auto tmp_v = tmp.View();
|
||||||
|
thread_loop((auto ss = tmp_v.begin(); ss < tmp_v.end(); ss++), {
|
||||||
|
link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1));
|
||||||
});
|
});
|
||||||
PokeIndex<LorentzIndex>(mat, link, mu);
|
PokeIndex<LorentzIndex>(mat, link, mu);
|
||||||
return;
|
return;
|
||||||
@ -641,11 +645,14 @@ public:
|
|||||||
|
|
||||||
GaugeLinkField tmp(mat.Grid());
|
GaugeLinkField tmp(mat.Grid());
|
||||||
tmp = Zero();
|
tmp = Zero();
|
||||||
|
auto tmp_v = tmp.View();
|
||||||
|
auto Atilde_v = Atilde.View();
|
||||||
|
auto Btilde_v = Btilde.View();
|
||||||
thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{
|
thread_loop((int ss = 0; ss < tmp.Grid()->oSites(); ss++) ,{
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
int sF = s + Ls * ss;
|
int sF = s + Ls * ss;
|
||||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
|
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
|
||||||
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||||
|
@ -249,8 +249,12 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
|
|||||||
////////////////////////
|
////////////////////////
|
||||||
// Call the single hop
|
// Call the single hop
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto UUU_v = UUU.View();
|
||||||
|
auto B_v = B.View();
|
||||||
|
auto Btilde_v = Btilde.View();
|
||||||
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
|
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
|
||||||
Kernels::DhopDirK(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1);
|
Kernels::DhopDirK(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Force in three link terms
|
// Force in three link terms
|
||||||
@ -360,9 +364,12 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
|
|||||||
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
|
auto UUUmu_v = UUUmu.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = out.View();
|
||||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
|
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
|
||||||
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp);
|
Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -377,13 +384,17 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
|
|||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
|
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto UUU_v = UUU.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = out.View();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
|
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
|
||||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
|
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++), {
|
||||||
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -171,12 +171,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
|
|||||||
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
Stencil.HaloExchange(in,compressor);
|
Stencil.HaloExchange(in,compressor);
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
|
auto UUUmu_v = UUUmu.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = in.View();
|
||||||
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
|
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
int sF = s+Ls*sU;
|
int sF = s+Ls*sU;
|
||||||
Kernels::DhopDirK(Stencil, Umu, UUUmu, Stencil.CommBuf(), sF, sU, in, out, dir, disp);
|
Kernels::DhopDirK(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@ -230,24 +233,26 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
|||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DhopTotalTime -= usecond();
|
DhopTotalTime -= usecond();
|
||||||
DhopCommTime -= usecond();
|
DhopCommTime -= usecond();
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
DhopCommTime += usecond();
|
DhopCommTime += usecond();
|
||||||
|
|
||||||
DhopComputeTime -= usecond();
|
DhopComputeTime -= usecond();
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto UUU_v = UUU.View();
|
||||||
|
auto out_v = out.View();
|
||||||
|
auto in_v = in.View();
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
|
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
|
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
|
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
DhopComputeTime += usecond();
|
||||||
|
@ -35,16 +35,18 @@ See the full license in the file "LICENSE" in the top level distribution directo
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase *grid = psi.Grid();
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -76,16 +78,20 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi, const FermionField &p
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||||
std::vector<Coeff_t> &shift_coeffs)
|
std::vector<Coeff_t> &shift_coeffs)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||||
GridBase *grid = psi.Grid();
|
|
||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -120,14 +126,17 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi, const FermionFi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
GridBase *grid = psi.Grid();
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -159,16 +168,19 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi, const FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi,
|
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i,
|
||||||
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
std::vector<Coeff_t> &lower, std::vector<Coeff_t> &diag, std::vector<Coeff_t> &upper,
|
||||||
std::vector<Coeff_t> &shift_coeffs)
|
std::vector<Coeff_t> &shift_coeffs)
|
||||||
{
|
{
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator
|
||||||
GridBase *grid = psi.Grid();
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
this->M5Dcalls++;
|
||||||
@ -204,14 +216,15 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &chi_i)
|
||||||
{
|
{
|
||||||
if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; }
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
GridBase *grid = psi.Grid();
|
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
@ -251,12 +264,14 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi, FermionField &ch
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionField &chi)
|
void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i)
|
||||||
{
|
{
|
||||||
GridBase *grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
@ -306,14 +321,15 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi, FermionFie
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField &chi)
|
void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i)
|
||||||
{
|
{
|
||||||
if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; }
|
if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; }
|
||||||
|
|
||||||
GridBase *grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
auto psi = psi_i.View();
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
auto chi = chi_i.View();
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
@ -353,12 +369,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi, FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi, FermionField &chi)
|
void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i)
|
||||||
{
|
{
|
||||||
GridBase *grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase *grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
this->MooeeInvCalls++;
|
||||||
this->MooeeInvTime -= usecond();
|
this->MooeeInvTime -= usecond();
|
||||||
|
@ -63,10 +63,14 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField& psi, Fermion
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& phi,
|
void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||||
{
|
{
|
||||||
GridBase* grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
const int nsimd = Simd::Nsimd();
|
const int nsimd = Simd::Nsimd();
|
||||||
@ -78,8 +82,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
|
|||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
scalar_type* u_p = (scalar_type*) &u[0];
|
scalar_type* u_p = (scalar_type*) &u[0];
|
||||||
@ -208,11 +210,14 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField& psi, const FermionField& p
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionField& phi,
|
void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi_i, const FermionField& phi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
FermionField& chi_i, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||||
std::vector<Coeff_t>& shift_coeffs)
|
std::vector<Coeff_t>& shift_coeffs)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
|
auto & psi = psi_i;
|
||||||
|
auto & phi = phi_i;
|
||||||
|
auto & chi = chi_i;
|
||||||
|
|
||||||
this->M5D(psi, phi, chi, lower, diag, upper);
|
this->M5D(psi, phi, chi, lower, diag, upper);
|
||||||
|
|
||||||
@ -225,8 +230,11 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
GridBase* grid = psi.Grid();
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
const int nsimd = Simd::Nsimd();
|
const int nsimd = Simd::Nsimd();
|
||||||
@ -239,7 +247,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
|||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -389,10 +396,14 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField& psi, const FermionFi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField& phi,
|
void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper)
|
||||||
{
|
{
|
||||||
GridBase* grid = psi.Grid();
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
int nsimd = Simd::Nsimd();
|
int nsimd = Simd::Nsimd();
|
||||||
@ -404,7 +415,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
|
|||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -531,12 +541,14 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField& psi, const FermionField
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const FermionField& phi,
|
void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i,
|
||||||
FermionField& chi, std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
std::vector<Coeff_t>& lower, std::vector<Coeff_t>& diag, std::vector<Coeff_t>& upper,
|
||||||
std::vector<Coeff_t>& shift_coeffs)
|
std::vector<Coeff_t>& shift_coeffs)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
|
auto & psi = psi_i;
|
||||||
|
auto & phi = phi_i;
|
||||||
|
auto & chi = chi_i;
|
||||||
this->M5Ddag(psi, phi, chi, lower, diag, upper);
|
this->M5Ddag(psi, phi, chi, lower, diag, upper);
|
||||||
|
|
||||||
// FIXME: possible gain from vectorizing shift operation as well?
|
// FIXME: possible gain from vectorizing shift operation as well?
|
||||||
@ -548,8 +560,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
chi_i.Checkerboard() = psi_i.Checkerboard();
|
||||||
GridBase* grid = psi.Grid();
|
GridBase* grid = psi_i.Grid();
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto phi = phi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
int nsimd = Simd::Nsimd();
|
int nsimd = Simd::Nsimd();
|
||||||
@ -562,7 +577,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
|||||||
assert(Ls/LLs == nsimd);
|
assert(Ls/LLs == nsimd);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
@ -717,9 +731,11 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField& psi, const Fermio
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionField& chi,
|
void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi_i, FermionField& chi_i,
|
||||||
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
int LLs, int site, Vector<iSinglet<Simd> >& Matp, Vector<iSinglet<Simd> >& Matm)
|
||||||
{
|
{
|
||||||
|
auto psi = psi_i.View();
|
||||||
|
auto chi = chi_i.View();
|
||||||
#ifndef AVX512
|
#ifndef AVX512
|
||||||
{
|
{
|
||||||
SiteHalfSpinor BcastP;
|
SiteHalfSpinor BcastP;
|
||||||
@ -909,12 +925,12 @@ void MobiusEOFAFermion<Impl>::MooeeInternalZAsm(const FermionField& psi, Fermion
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv)
|
||||||
{
|
{
|
||||||
|
chi.Checkerboard() = psi.Checkerboard();
|
||||||
|
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = psi.Grid()->_rdimensions[0];
|
int LLs = psi.Grid()->_rdimensions[0];
|
||||||
int vol = psi.Grid()->oSites()/LLs;
|
int vol = psi.Grid()->oSites()/LLs;
|
||||||
|
|
||||||
chi.Checkerboard() = psi.Checkerboard();
|
|
||||||
|
|
||||||
Vector<iSinglet<Simd>> Matp;
|
Vector<iSinglet<Simd>> Matp;
|
||||||
Vector<iSinglet<Simd>> Matm;
|
Vector<iSinglet<Simd>> Matm;
|
||||||
Vector<iSinglet<Simd>>* _Matp;
|
Vector<iSinglet<Simd>>* _Matp;
|
||||||
|
@ -40,9 +40,9 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteSpinor *buf, int sF,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
|
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) {
|
||||||
const SiteSpinor *chi_p;
|
const SiteSpinor *chi_p;
|
||||||
SiteSpinor chi;
|
SiteSpinor chi;
|
||||||
SiteSpinor Uchi;
|
SiteSpinor Uchi;
|
||||||
@ -183,9 +183,9 @@ void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, D
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int LLs, int sU,
|
||||||
const FermionField &in, FermionField &out) {
|
const FermionFieldView &in, FermionFieldView &out) {
|
||||||
SiteSpinor naik;
|
SiteSpinor naik;
|
||||||
SiteSpinor naive;
|
SiteSpinor naive;
|
||||||
int oneLink =0;
|
int oneLink =0;
|
||||||
@ -221,9 +221,9 @@ void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
int oneLink =0;
|
int oneLink =0;
|
||||||
int threeLink=1;
|
int threeLink=1;
|
||||||
@ -258,8 +258,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF,
|
void StaggeredKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out, int dir, int disp)
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
|
||||||
{
|
{
|
||||||
// Disp should be either +1,-1,+3,-3
|
// Disp should be either +1,-1,+3,-3
|
||||||
// What about "dag" ?
|
// What about "dag" ?
|
||||||
|
@ -46,30 +46,34 @@ public:
|
|||||||
|
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef FermionOperator<Impl> Base;
|
typedef FermionOperator<Impl> Base;
|
||||||
|
|
||||||
|
typedef typename ViewMap<FermionField>::Type FermionFieldView;
|
||||||
|
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void DhopDirK(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
||||||
|
|
||||||
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
|
int sF, int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink);
|
||||||
|
|
||||||
|
|
||||||
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
|
int sF, int sU, const FermionFieldView &in, SiteSpinor&out,int threeLink);
|
||||||
|
|
||||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
|
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,SiteSpinor * buf,
|
||||||
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
|
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
|
||||||
|
|
||||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
|
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf,
|
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf,
|
||||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
int LLs, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -580,10 +580,10 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeField &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
};
|
};
|
||||||
@ -644,10 +644,10 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
// This is the single precision 5th direction vectorised kernel
|
// This is the single precision 5th direction vectorised kernel
|
||||||
#include <simd/Intel512single.h>
|
#include <simd/Intel512single.h>
|
||||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeField &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
@ -694,10 +694,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
|||||||
|
|
||||||
#include <simd/Intel512double.h>
|
#include <simd/Intel512double.h>
|
||||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeField &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
@ -775,10 +775,10 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
|||||||
|
|
||||||
#include <simd/Intel512single.h>
|
#include <simd/Intel512single.h>
|
||||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeField &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
@ -840,10 +840,10 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
|||||||
|
|
||||||
#include <simd/Intel512double.h>
|
#include <simd/Intel512double.h>
|
||||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
DoubledGaugeField &UUU,
|
DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||||
@ -905,10 +905,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
|||||||
|
|
||||||
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
|
#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \
|
||||||
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
|
template void CLASS<IMPL>::FUNC(StencilImpl &st, LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
DoubledGaugeField &UUU, \
|
DoubledGaugeFieldView &UUU, \
|
||||||
SiteSpinor *buf, int LLs, \
|
SiteSpinor *buf, int LLs, \
|
||||||
int sU, const FermionField &in, FermionField &out);
|
int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
|
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
|
||||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
|
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
|
||||||
|
@ -89,9 +89,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
|
||||||
SiteSpinor *buf, int LLs,
|
SiteSpinor *buf, int LLs,
|
||||||
int sU, const FermionField &in, FermionField &out, int dag)
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dag)
|
||||||
{
|
{
|
||||||
SiteSpinor naik;
|
SiteSpinor naik;
|
||||||
SiteSpinor naive;
|
SiteSpinor naive;
|
||||||
@ -110,9 +110,9 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, Do
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteSpinor *buf, int sF,
|
SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
|
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -298,14 +298,14 @@ void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &l
|
|||||||
|
|
||||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
||||||
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \
|
||||||
SiteSpinor *buf, int LLs, \
|
SiteSpinor *buf, int LLs, \
|
||||||
int sU, const FermionField &in, FermionField &out, int dag);
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dag);
|
||||||
|
|
||||||
#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \
|
#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \
|
||||||
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \
|
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, \
|
||||||
SiteSpinor *buf, int sF, \
|
SiteSpinor *buf, int sF, \
|
||||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) ;
|
int sU, const FermionFieldView &in, SiteSpinor &out,int threeLink) ;
|
||||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
|
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
|
||||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
|
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
|
||||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
|
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
|
||||||
|
@ -216,8 +216,11 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
////////////////////////
|
////////////////////////
|
||||||
// Call the single hop
|
// Call the single hop
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto B_v = B.View();
|
||||||
|
auto Btilde_v = Btilde.View();
|
||||||
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{
|
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++) ,{
|
||||||
Kernels::DhopDirK(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma);
|
Kernels::DhopDirK(st, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma);
|
||||||
});
|
});
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
@ -316,9 +319,11 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
|||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = in.View();
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{
|
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) ,{
|
||||||
Kernels::DhopDirK(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
|
Kernels::DhopDirK(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -333,13 +338,16 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
|
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v= out.View();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
accelerator_loop( sss,in, {
|
accelerator_loop( sss,in_v, {
|
||||||
Kernels::DhopSiteDag(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
Kernels::DhopSiteDag(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
accelerator_loop( sss,in, {
|
accelerator_loop( sss,in_v, {
|
||||||
Kernels::DhopSite(Opt,st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
Kernels::DhopSite(Opt,st, lo, U_v, st.CommBuf(), sss, sss, 1, 1, in_v, out_v);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -367,15 +375,21 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|||||||
// Inefficient comms method but not performance critical.
|
// Inefficient comms method but not performance critical.
|
||||||
tmp1 = Cshift(q_in_1, mu, 1);
|
tmp1 = Cshift(q_in_1, mu, 1);
|
||||||
tmp2 = Cshift(q_in_2, mu, 1);
|
tmp2 = Cshift(q_in_2, mu, 1);
|
||||||
|
auto tmp1_v = tmp1.View();
|
||||||
|
auto tmp2_v = tmp2.View();
|
||||||
|
auto q_in_1_v=q_in_1.View();
|
||||||
|
auto q_in_2_v=q_in_2.View();
|
||||||
|
auto q_out_v = q_out.View();
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
||||||
Kernels::ContractConservedCurrentSiteFwd(tmp1[sU],
|
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU],
|
||||||
q_in_2[sU],
|
q_in_2_v[sU],
|
||||||
q_out[sU],
|
q_out_v[sU],
|
||||||
Umu, sU, mu);
|
Umu_v, sU, mu);
|
||||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sU],
|
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU],
|
||||||
tmp2[sU],
|
tmp2_v[sU],
|
||||||
q_out[sU],
|
q_out_v[sU],
|
||||||
Umu, sU, mu);
|
Umu_v, sU, mu);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,34 +429,40 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
tmp = ph*q_in;
|
tmp = ph*q_in;
|
||||||
tmpBwd = Cshift(tmp, mu, -1);
|
tmpBwd = Cshift(tmp, mu, -1);
|
||||||
|
|
||||||
|
auto coords_v = coords.View();
|
||||||
|
auto tmpFwd_v = tmpFwd.View();
|
||||||
|
auto tmpBwd_v = tmpBwd.View();
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
|
auto q_out_v = q_out.View();
|
||||||
|
|
||||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
||||||
|
|
||||||
// Compute the sequential conserved current insertion only if our simd
|
// Compute the sequential conserved current insertion only if our simd
|
||||||
// object contains a timeslice we need.
|
// object contains a timeslice we need.
|
||||||
vInteger t_mask = ((coords[sU] >= tmin) &&
|
vInteger t_mask = ((coords_v[sU] >= tmin) &&
|
||||||
(coords[sU] <= tmax));
|
(coords_v[sU] <= tmax));
|
||||||
Integer timeSlices = Reduce(t_mask);
|
Integer timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
if (timeSlices > 0) {
|
if (timeSlices > 0) {
|
||||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sU],
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU],
|
||||||
q_out[sU],
|
q_out_v[sU],
|
||||||
Umu, sU, mu, t_mask);
|
Umu_v, sU, mu, t_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repeat for backward direction.
|
// Repeat for backward direction.
|
||||||
t_mask = ((coords[sU] >= (tmin + tshift)) &&
|
t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
|
||||||
(coords[sU] <= (tmax + tshift)));
|
(coords_v[sU] <= (tmax + tshift)));
|
||||||
|
|
||||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||||
unsigned int t0 = 0;
|
unsigned int t0 = 0;
|
||||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
|
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
|
||||||
|
|
||||||
timeSlices = Reduce(t_mask);
|
timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
if (timeSlices > 0) {
|
if (timeSlices > 0) {
|
||||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sU],
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU],
|
||||||
q_out[sU],
|
q_out_v[sU],
|
||||||
Umu, sU, mu, t_mask);
|
Umu_v, sU, mu, t_mask);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -244,11 +244,14 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
|
|||||||
assert(dirdisp<=7);
|
assert(dirdisp<=7);
|
||||||
assert(dirdisp>=0);
|
assert(dirdisp>=0);
|
||||||
|
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = out.View();
|
||||||
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
|
thread_loop( (int ss=0;ss<Umu.Grid()->oSites();ss++),{
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
int sF = s+Ls*sU;
|
int sF = s+Ls*sU;
|
||||||
Kernels::DhopDirK(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
|
Kernels::DhopDirK(Stencil,Umu_v,Stencil.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@ -279,7 +282,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
Atilde=A;
|
Atilde=A;
|
||||||
int LLs = B.Grid()->_rdimensions[0];
|
int LLs = B.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
|
|
||||||
DerivComputeTime-=usecond();
|
DerivComputeTime-=usecond();
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -293,15 +295,20 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
DerivDhopComputeTime -= usecond();
|
DerivDhopComputeTime -= usecond();
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto Btilde_v = Btilde.View();
|
||||||
|
auto B_v = B.View();
|
||||||
|
int Bsites = B.Grid()->oSites();
|
||||||
|
int Usites = U.Grid()->oSites();
|
||||||
thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{
|
thread_loop( (int sss = 0; sss < U.Grid()->oSites(); sss++) ,{
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
int sU = sss;
|
int sU = sss;
|
||||||
int sF = s + Ls * sU;
|
int sF = s + Ls * sU;
|
||||||
|
|
||||||
assert(sF < B.Grid()->oSites());
|
assert(sF < Bsites);
|
||||||
assert(sU < U.Grid()->oSites());
|
assert(sU < Usites);
|
||||||
|
|
||||||
Kernels::DhopDirK(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
|
Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma);
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
@ -406,6 +413,9 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = out.View();
|
||||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
||||||
{
|
{
|
||||||
int tid = omp_get_thread_num();
|
int tid = omp_get_thread_num();
|
||||||
@ -435,13 +445,13 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
|
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0);
|
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,1,0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ptime = usecond() - start;
|
ptime = usecond() - start;
|
||||||
@ -470,14 +480,14 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
thread_loop( (int ss = 0; ss < sz; ss++) ,{
|
thread_loop( (int ss = 0; ss < sz; ss++) ,{
|
||||||
int sU = st.surface_list[ss];
|
int sU = st.surface_list[ss];
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
int sz=st.surface_list.size();
|
int sz=st.surface_list.size();
|
||||||
thread_loop( (int ss = 0; ss < sz; ss++) ,{
|
thread_loop( (int ss = 0; ss < sz; ss++) ,{
|
||||||
int sU = st.surface_list[ss];
|
int sU = st.surface_list[ss];
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v,0,1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
DhopComputeTime2+=usecond();
|
DhopComputeTime2+=usecond();
|
||||||
@ -505,17 +515,20 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
|||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
|
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = out.View();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
accelerator_loop( ss, U, {
|
accelerator_loop( ss, U_v, {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DhopSiteDag(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
Kernels::DhopSiteDag(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
accelerator_loop( ss, U , {
|
accelerator_loop( ss, U_v , {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
int sF = LLs * sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DhopSite(Opt,st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
Kernels::DhopSite(Opt,st,lo,U_v,st.CommBuf(),sF,sU,LLs,1,in_v,out_v);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
DhopComputeTime+=usecond();
|
||||||
@ -738,10 +751,17 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|||||||
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
|
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
|
||||||
q_out = Zero();
|
q_out = Zero();
|
||||||
|
|
||||||
|
auto q_in_1_v = q_in_1.View();
|
||||||
|
auto q_in_2_v = q_in_2.View();
|
||||||
|
auto tmp1_v = tmp1.View();
|
||||||
|
auto tmp2_v = tmp2.View();
|
||||||
|
auto q_out_v = q_out.View();
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
|
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
|
||||||
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
|
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
|
||||||
tmp1 = Cshift(q_in_1, mu + 1, 1);
|
tmp1 = Cshift(q_in_1, mu + 1, 1);
|
||||||
tmp2 = Cshift(q_in_2, mu + 1, 1);
|
tmp2 = Cshift(q_in_2, mu + 1, 1);
|
||||||
|
|
||||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU), {
|
||||||
unsigned int sF1 = sU * LLs;
|
unsigned int sF1 = sU * LLs;
|
||||||
unsigned int sF2 = (sU + 1) * LLs - 1;
|
unsigned int sF2 = (sU + 1) * LLs - 1;
|
||||||
@ -755,20 +775,20 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
|
|||||||
// If vectorised in 5th dimension, reverse q2 vector to match up
|
// If vectorised in 5th dimension, reverse q2 vector to match up
|
||||||
// sites correctly.
|
// sites correctly.
|
||||||
if (Impl::LsVectorised) {
|
if (Impl::LsVectorised) {
|
||||||
REVERSE_LS(q_in_2[sF2], qSite2, Ls / LLs);
|
REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs);
|
||||||
REVERSE_LS(tmp2[sF2], qmuSite2, Ls / LLs);
|
REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs);
|
||||||
} else {
|
} else {
|
||||||
qSite2 = q_in_2[sF2];
|
qSite2 = q_in_2_v[sF2];
|
||||||
qmuSite2 = tmp2[sF2];
|
qmuSite2 = tmp2_v[sF2];
|
||||||
}
|
}
|
||||||
Kernels::ContractConservedCurrentSiteFwd(tmp1[sF1],
|
Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1],
|
||||||
qSite2,
|
qSite2,
|
||||||
q_out[sU],
|
q_out_v[sU],
|
||||||
Umu, sU, mu, axial_sign);
|
Umu_v, sU, mu, axial_sign);
|
||||||
Kernels::ContractConservedCurrentSiteBwd(q_in_1[sF1],
|
Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1],
|
||||||
qmuSite2,
|
qmuSite2,
|
||||||
q_out[sU],
|
q_out_v[sU],
|
||||||
Umu, sU, mu, axial_sign);
|
Umu_v, sU, mu, axial_sign);
|
||||||
sF1++;
|
sF1++;
|
||||||
sF2--;
|
sF2--;
|
||||||
}
|
}
|
||||||
@ -808,7 +828,7 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
q_out = Zero();
|
q_out = Zero();
|
||||||
LatticeInteger coords(_FourDimGrid);
|
LatticeInteger coords(_FourDimGrid);
|
||||||
LatticeCoordinate(coords, Tp);
|
LatticeCoordinate(coords, Tp);
|
||||||
|
auto coords_v = coords.View();
|
||||||
// Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu
|
// Need q(x + mu, s) and q(x - mu, s). 5D lattice so shift 4D coordinate mu
|
||||||
// by one.
|
// by one.
|
||||||
tmp = Cshift(q_in, mu + 1, 1);
|
tmp = Cshift(q_in, mu + 1, 1);
|
||||||
@ -816,11 +836,15 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
tmp = ph*q_in;
|
tmp = ph*q_in;
|
||||||
tmpBwd = Cshift(tmp, mu + 1, -1);
|
tmpBwd = Cshift(tmp, mu + 1, -1);
|
||||||
|
|
||||||
|
auto tmpBwd_v = tmpBwd.View();
|
||||||
|
auto tmpFwd_v = tmpFwd.View();
|
||||||
|
auto q_out_v = q_out.View();
|
||||||
|
auto Umu_v = Umu.View();
|
||||||
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{
|
thread_loop( (unsigned int sU = 0; sU < Umu.Grid()->oSites(); ++sU) ,{
|
||||||
// Compute the sequential conserved current insertion only if our simd
|
// Compute the sequential conserved current insertion only if our simd
|
||||||
// object contains a timeslice we need.
|
// object contains a timeslice we need.
|
||||||
vInteger t_mask = ((coords[sU] >= tmin) &&
|
vInteger t_mask = ((coords_v[sU] >= tmin) &&
|
||||||
(coords[sU] <= tmax));
|
(coords_v[sU] <= tmax));
|
||||||
Integer timeSlices = Reduce(t_mask);
|
Integer timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
if (timeSlices > 0) {
|
if (timeSlices > 0) {
|
||||||
@ -828,20 +852,20 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
unsigned int sF = sU * LLs;
|
unsigned int sF = sU * LLs;
|
||||||
for (unsigned int s = 0; s < LLs; ++s) {
|
for (unsigned int s = 0; s < LLs; ++s) {
|
||||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||||
Kernels::SeqConservedCurrentSiteFwd(tmpFwd[sF],
|
Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sF],
|
||||||
q_out[sF], Umu, sU,
|
q_out_v[sF], Umu_v, sU,
|
||||||
mu, t_mask, axial_sign);
|
mu, t_mask, axial_sign);
|
||||||
++sF;
|
++sF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Repeat for backward direction.
|
// Repeat for backward direction.
|
||||||
t_mask = ((coords[sU] >= (tmin + tshift)) &&
|
t_mask = ((coords_v[sU] >= (tmin + tshift)) &&
|
||||||
(coords[sU] <= (tmax + tshift)));
|
(coords_v[sU] <= (tmax + tshift)));
|
||||||
|
|
||||||
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
//if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3)
|
||||||
unsigned int t0 = 0;
|
unsigned int t0 = 0;
|
||||||
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords[sU] == t0 ));
|
if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords_v[sU] == t0 ));
|
||||||
|
|
||||||
timeSlices = Reduce(t_mask);
|
timeSlices = Reduce(t_mask);
|
||||||
|
|
||||||
@ -849,8 +873,8 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
unsigned int sF = sU * LLs;
|
unsigned int sF = sU * LLs;
|
||||||
for (unsigned int s = 0; s < LLs; ++s) {
|
for (unsigned int s = 0; s < LLs; ++s) {
|
||||||
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2)));
|
||||||
Kernels::SeqConservedCurrentSiteBwd(tmpBwd[sF],
|
Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sF],
|
||||||
q_out[sF], Umu, sU,
|
q_out_v[sF], Umu_v, sU,
|
||||||
mu, t_mask, axial_sign);
|
mu, t_mask, axial_sign);
|
||||||
++sF;
|
++sF;
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,7 @@ int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
|
|||||||
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
|
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Generic implementation; move to different file?
|
// Generic implementation; move to different file?
|
||||||
@ -103,9 +103,9 @@ accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
|||||||
// All legs kernels ; comms then compute
|
// All legs kernels ; comms then compute
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteHalfSpinor *buf, int sF,
|
SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
@ -127,9 +127,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, Lebesg
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteHalfSpinor *buf, int sF,
|
SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
@ -153,9 +153,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueO
|
|||||||
// Interior kernels
|
// Interior kernels
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteHalfSpinor *buf, int sF,
|
SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
@ -178,9 +178,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, Leb
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteHalfSpinor *buf, int sF,
|
SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
@ -204,9 +204,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, Lebesg
|
|||||||
// Exterior kernels
|
// Exterior kernels
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteHalfSpinor *buf, int sF,
|
SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
// SiteHalfSpinor tmp;
|
// SiteHalfSpinor tmp;
|
||||||
// SiteHalfSpinor chi;
|
// SiteHalfSpinor chi;
|
||||||
@ -231,9 +231,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, Leb
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,
|
||||||
SiteHalfSpinor *buf, int sF,
|
SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out)
|
int sU, const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
// SiteHalfSpinor tmp;
|
// SiteHalfSpinor tmp;
|
||||||
// SiteHalfSpinor chi;
|
// SiteHalfSpinor chi;
|
||||||
@ -258,9 +258,9 @@ accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, Lebesg
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
|
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
|
||||||
|
{
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
@ -300,23 +300,23 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd(
|
|||||||
const SitePropagator &q_in_1,
|
const SitePropagator &q_in_1,
|
||||||
const SitePropagator &q_in_2,
|
const SitePropagator &q_in_2,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
bool switch_sign)
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
SitePropagator result, tmp;
|
SitePropagator result, tmp;
|
||||||
Gamma g5(Gamma::Algebra::Gamma5);
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
|
|
||||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
Impl::multLinkProp(tmp, U[sU], q_in_1, mu);
|
||||||
if (switch_sign)
|
|
||||||
{
|
result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu);
|
||||||
q_out -= result;
|
|
||||||
}
|
if (switch_sign) {
|
||||||
else
|
q_out -= result;
|
||||||
{
|
} else {
|
||||||
q_out += result;
|
q_out += result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
@ -330,23 +330,22 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd(
|
|||||||
const SitePropagator &q_in_1,
|
const SitePropagator &q_in_1,
|
||||||
const SitePropagator &q_in_2,
|
const SitePropagator &q_in_2,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
bool switch_sign)
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
SitePropagator result, tmp;
|
SitePropagator result, tmp;
|
||||||
Gamma g5(Gamma::Algebra::Gamma5);
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
|
|
||||||
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
Impl::multLinkProp(tmp, U[sU], q_in_1, mu + Nd);
|
||||||
if (switch_sign)
|
|
||||||
{
|
result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu);
|
||||||
q_out += result;
|
if (switch_sign) {
|
||||||
}
|
q_out += result;
|
||||||
else
|
} else {
|
||||||
{
|
q_out -= result;
|
||||||
q_out -= result;
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// G-parity requires more specialised implementation.
|
// G-parity requires more specialised implementation.
|
||||||
@ -356,7 +355,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteFwd( \
|
|||||||
const SitePropagator &q_in_1, \
|
const SitePropagator &q_in_1, \
|
||||||
const SitePropagator &q_in_2, \
|
const SitePropagator &q_in_2, \
|
||||||
SitePropagator &q_out, \
|
SitePropagator &q_out, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
unsigned int sU, \
|
unsigned int sU, \
|
||||||
unsigned int mu, \
|
unsigned int mu, \
|
||||||
bool switch_sign) \
|
bool switch_sign) \
|
||||||
@ -368,7 +367,7 @@ void WilsonKernels<Impl>::ContractConservedCurrentSiteBwd( \
|
|||||||
const SitePropagator &q_in_1, \
|
const SitePropagator &q_in_1, \
|
||||||
const SitePropagator &q_in_2, \
|
const SitePropagator &q_in_2, \
|
||||||
SitePropagator &q_out, \
|
SitePropagator &q_out, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
unsigned int mu, \
|
unsigned int mu, \
|
||||||
unsigned int sU, \
|
unsigned int sU, \
|
||||||
bool switch_sign) \
|
bool switch_sign) \
|
||||||
@ -391,27 +390,25 @@ NO_CURR_SITE(GparityWilsonImplDF);
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
vInteger t_mask,
|
vInteger t_mask,
|
||||||
bool switch_sign)
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
SitePropagator result;
|
SitePropagator result;
|
||||||
Impl::multLinkProp(result, U[sU], q_in, mu);
|
|
||||||
result = WilsonCurrentFwd(result, mu);
|
Impl::multLinkProp(result, U[sU], q_in, mu);
|
||||||
|
result = WilsonCurrentFwd(result, mu);
|
||||||
|
|
||||||
// Zero any unwanted timeslice entries.
|
// Zero any unwanted timeslice entries.
|
||||||
result = predicatedWhere(t_mask, result, 0.*result);
|
result = predicatedWhere(t_mask, result, 0.*result);
|
||||||
|
|
||||||
if (switch_sign)
|
if (switch_sign) {
|
||||||
{
|
q_out -= result;
|
||||||
q_out -= result;
|
} else {
|
||||||
}
|
q_out += result;
|
||||||
else
|
}
|
||||||
{
|
|
||||||
q_out += result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
@ -423,27 +420,24 @@ void WilsonKernels<Impl>::SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
void WilsonKernels<Impl>::SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
vInteger t_mask,
|
vInteger t_mask,
|
||||||
bool switch_sign)
|
bool switch_sign)
|
||||||
{
|
{
|
||||||
SitePropagator result;
|
SitePropagator result;
|
||||||
Impl::multLinkProp(result, U[sU], q_in, mu + Nd);
|
Impl::multLinkProp(result, U[sU], q_in, mu + Nd);
|
||||||
result = WilsonCurrentBwd(result, mu);
|
result = WilsonCurrentBwd(result, mu);
|
||||||
|
|
||||||
// Zero any unwanted timeslice entries.
|
// Zero any unwanted timeslice entries.
|
||||||
result = predicatedWhere(t_mask, result, 0.*result);
|
result = predicatedWhere(t_mask, result, 0.*result);
|
||||||
|
|
||||||
if (switch_sign)
|
if (switch_sign) {
|
||||||
{
|
q_out += result;
|
||||||
q_out += result;
|
} else {
|
||||||
}
|
q_out -= result;
|
||||||
else
|
}
|
||||||
{
|
|
||||||
q_out -= result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonKernels);
|
FermOpTemplateInstantiate(WilsonKernels);
|
||||||
|
@ -50,13 +50,16 @@ public:
|
|||||||
|
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef FermionOperator<Impl> Base;
|
typedef FermionOperator<Impl> Base;
|
||||||
|
|
||||||
|
typedef typename ViewMap<FermionField>::Type FermionFieldView;
|
||||||
|
typedef typename ViewMap<DoubledGaugeField>::Type DoubledGaugeFieldView;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
template <bool EnableBool = true> accelerator
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||||
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
DhopSite(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
||||||
{
|
{
|
||||||
bgq_l1p_optimisation(1);
|
bgq_l1p_optimisation(1);
|
||||||
switch(Opt) {
|
switch(Opt) {
|
||||||
@ -99,8 +102,8 @@ public:
|
|||||||
|
|
||||||
template <bool EnableBool = true> accelerator
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||||
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
DhopSite(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1 ) {
|
||||||
// no kernel choice
|
// no kernel choice
|
||||||
for (int site = 0; site < Nsite; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
@ -116,8 +119,8 @@ public:
|
|||||||
|
|
||||||
template <bool EnableBool = true> accelerator
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
||||||
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
DhopSiteDag(int Opt, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1)
|
||||||
{
|
{
|
||||||
bgq_l1p_optimisation(1);
|
bgq_l1p_optimisation(1);
|
||||||
switch(Opt) {
|
switch(Opt) {
|
||||||
@ -161,8 +164,8 @@ public:
|
|||||||
|
|
||||||
template <bool EnableBool = true> accelerator
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
||||||
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
DhopSiteDag(int Opt,StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out,int interior=1,int exterior=1) {
|
||||||
|
|
||||||
for (int site = 0; site < Nsite; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
@ -176,8 +179,8 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
accelerator void DhopDirK(StencilImpl &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Utilities for inserting Wilson conserved current.
|
// Utilities for inserting Wilson conserved current.
|
||||||
@ -185,27 +188,27 @@ public:
|
|||||||
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1,
|
||||||
const SitePropagator &q_in_2,
|
const SitePropagator &q_in_2,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
bool switch_sign = false);
|
bool switch_sign = false);
|
||||||
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
|
void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1,
|
||||||
const SitePropagator &q_in_2,
|
const SitePropagator &q_in_2,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
bool switch_sign = false);
|
bool switch_sign = false);
|
||||||
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
void SeqConservedCurrentSiteFwd(const SitePropagator &q_in,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
vInteger t_mask,
|
vInteger t_mask,
|
||||||
bool switch_sign = false);
|
bool switch_sign = false);
|
||||||
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
void SeqConservedCurrentSiteBwd(const SitePropagator &q_in,
|
||||||
SitePropagator &q_out,
|
SitePropagator &q_out,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeFieldView &U,
|
||||||
unsigned int sU,
|
unsigned int sU,
|
||||||
unsigned int mu,
|
unsigned int mu,
|
||||||
vInteger t_mask,
|
vInteger t_mask,
|
||||||
@ -213,60 +216,60 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Specialised variants
|
// Specialised variants
|
||||||
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
|
|
||||||
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -38,43 +38,43 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Default to no assembler implementation
|
// Default to no assembler implementation
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -83,21 +83,21 @@ WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl
|
|||||||
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmQPX.h>
|
||||||
|
|
||||||
#define INSTANTIATE_ASM(A)\
|
#define INSTANTIATE_ASM(A)\
|
||||||
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
\
|
\
|
||||||
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
\
|
\
|
||||||
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
\
|
\
|
||||||
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeFieldView &U, SiteHalfSpinor *buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
|
|
||||||
INSTANTIATE_ASM(WilsonImplF);
|
INSTANTIATE_ASM(WilsonImplF);
|
||||||
INSTANTIATE_ASM(WilsonImplD);
|
INSTANTIATE_ASM(WilsonImplD);
|
||||||
|
@ -573,8 +573,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -600,8 +600,8 @@ WilsonKernels<Impl>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -626,8 +626,8 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -654,8 +654,8 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -681,8 +681,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -711,8 +711,8 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -746,58 +746,58 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
|
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st, \
|
||||||
LebesgueOrder &lo, \
|
LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
SiteHalfSpinor *buf, \
|
SiteHalfSpinor *buf, \
|
||||||
int sF,int sU, \
|
int sF,int sU, \
|
||||||
const FermionField &in, \
|
const FermionFieldView &in, \
|
||||||
FermionField &out){ assert(0); } \
|
FermionFieldView &out){ assert(0); } \
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
|
WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st, \
|
||||||
LebesgueOrder &lo, \
|
LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
SiteHalfSpinor *buf, \
|
SiteHalfSpinor *buf, \
|
||||||
int sF,int sU, \
|
int sF,int sU, \
|
||||||
const FermionField &in, \
|
const FermionFieldView &in, \
|
||||||
FermionField &out){ assert(0); } \
|
FermionFieldView &out){ assert(0); } \
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
|
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st, \
|
||||||
LebesgueOrder &lo, \
|
LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
SiteHalfSpinor *buf, \
|
SiteHalfSpinor *buf, \
|
||||||
int sF,int sU, \
|
int sF,int sU, \
|
||||||
const FermionField &in, \
|
const FermionFieldView &in, \
|
||||||
FermionField &out){ assert(0); } \
|
FermionFieldView &out){ assert(0); } \
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
|
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st, \
|
||||||
LebesgueOrder &lo, \
|
LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
SiteHalfSpinor *buf, \
|
SiteHalfSpinor *buf, \
|
||||||
int sF,int sU, \
|
int sF,int sU, \
|
||||||
const FermionField &in, \
|
const FermionFieldView &in, \
|
||||||
FermionField &out){ assert(0); } \
|
FermionFieldView &out){ assert(0); } \
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
|
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st, \
|
||||||
LebesgueOrder &lo, \
|
LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
SiteHalfSpinor *buf, \
|
SiteHalfSpinor *buf, \
|
||||||
int sF,int sU, \
|
int sF,int sU, \
|
||||||
const FermionField &in, \
|
const FermionFieldView &in, \
|
||||||
FermionField &out){ assert(0); } \
|
FermionFieldView &out){ assert(0); } \
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
|
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st, \
|
||||||
LebesgueOrder &lo, \
|
LebesgueOrder &lo, \
|
||||||
DoubledGaugeField &U, \
|
DoubledGaugeFieldView &U, \
|
||||||
SiteHalfSpinor *buf, \
|
SiteHalfSpinor *buf, \
|
||||||
int sF,int sU, \
|
int sF,int sU, \
|
||||||
const FermionField &in, \
|
const FermionFieldView &in, \
|
||||||
FermionField &out){ assert(0); } \
|
FermionFieldView &out){ assert(0); } \
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
#define HAND_SPECIALISE_GPARITY(IMPL) \
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
WilsonKernels<IMPL>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||||
{ \
|
{ \
|
||||||
typedef IMPL Impl; \
|
typedef IMPL Impl; \
|
||||||
typedef typename Simd::scalar_type S; \
|
typedef typename Simd::scalar_type S; \
|
||||||
@ -812,8 +812,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
template<> \
|
template<> \
|
||||||
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
void WilsonKernels<IMPL>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||||
{ \
|
{ \
|
||||||
typedef IMPL Impl; \
|
typedef IMPL Impl; \
|
||||||
typedef typename Simd::scalar_type S; \
|
typedef typename Simd::scalar_type S; \
|
||||||
@ -828,8 +828,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
WilsonKernels<IMPL>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||||
{ \
|
{ \
|
||||||
typedef IMPL Impl; \
|
typedef IMPL Impl; \
|
||||||
typedef typename Simd::scalar_type S; \
|
typedef typename Simd::scalar_type S; \
|
||||||
@ -844,8 +844,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
template<> \
|
template<> \
|
||||||
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
void WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||||
{ \
|
{ \
|
||||||
typedef IMPL Impl; \
|
typedef IMPL Impl; \
|
||||||
typedef typename Simd::scalar_type S; \
|
typedef typename Simd::scalar_type S; \
|
||||||
@ -860,8 +860,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
template<> void \
|
template<> void \
|
||||||
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
WilsonKernels<IMPL>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||||
{ \
|
{ \
|
||||||
typedef IMPL Impl; \
|
typedef IMPL Impl; \
|
||||||
typedef typename Simd::scalar_type S; \
|
typedef typename Simd::scalar_type S; \
|
||||||
@ -877,8 +877,8 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D
|
|||||||
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
|
||||||
} \
|
} \
|
||||||
template<> \
|
template<> \
|
||||||
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
void WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out) \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
|
||||||
{ \
|
{ \
|
||||||
typedef IMPL Impl; \
|
typedef IMPL Impl; \
|
||||||
typedef typename Simd::scalar_type S; \
|
typedef typename Simd::scalar_type S; \
|
||||||
@ -904,18 +904,18 @@ HAND_SPECIALISE_GPARITY(GparityWilsonImplDF);
|
|||||||
////////////// Wilson ; uses this implementation /////////////////////
|
////////////// Wilson ; uses this implementation /////////////////////
|
||||||
|
|
||||||
#define INSTANTIATE_THEM(A) \
|
#define INSTANTIATE_THEM(A) \
|
||||||
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||||
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
template void WilsonKernels<A>::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);\
|
||||||
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||||
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
template void WilsonKernels<A>::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||||
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
template void WilsonKernels<A>::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out); \
|
||||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \
|
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
INSTANTIATE_THEM(WilsonImplF);
|
INSTANTIATE_THEM(WilsonImplF);
|
||||||
INSTANTIATE_THEM(WilsonImplD);
|
INSTANTIATE_THEM(WilsonImplD);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user