mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Accelerator related
This commit is contained in:
parent
cfeda9d536
commit
e4df025d01
@ -53,23 +53,23 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
template <bool EnableBool = true>
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||||
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
||||||
{
|
{
|
||||||
bgq_l1p_optimisation(1);
|
bgq_l1p_optimisation(1);
|
||||||
switch(Opt) {
|
switch(Opt) {
|
||||||
#if defined(AVX512) || defined (QPX)
|
#if defined(AVX512) || defined (QPX)
|
||||||
case OptInlineAsm:
|
case OptInlineAsm:
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
else if (interior) WilsonKernels<Impl>::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
|
||||||
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
|
||||||
else assert(0);
|
else assert(0);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case OptHandUnroll:
|
case OptHandUnroll:
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||||
@ -80,7 +80,7 @@ public:
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OptGeneric:
|
case OptGeneric:
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||||
@ -97,12 +97,12 @@ public:
|
|||||||
bgq_l1p_optimisation(0);
|
bgq_l1p_optimisation(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool EnableBool = true>
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||||
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) {
|
||||||
// no kernel choice
|
// no kernel choice
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||||
@ -114,23 +114,23 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool EnableBool = true>
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
||||||
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1)
|
||||||
{
|
{
|
||||||
bgq_l1p_optimisation(1);
|
bgq_l1p_optimisation(1);
|
||||||
switch(Opt) {
|
switch(Opt) {
|
||||||
#if defined(AVX512) || defined (QPX)
|
#if defined(AVX512) || defined (QPX)
|
||||||
case OptInlineAsm:
|
case OptInlineAsm:
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
else if (interior) WilsonKernels<Impl>::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
|
||||||
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
else if (exterior) WilsonKernels<Impl>::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Nsite,in,out);
|
||||||
else assert(0);
|
else assert(0);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case OptHandUnroll:
|
case OptHandUnroll:
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
else if (interior) WilsonKernels<Impl>::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
||||||
@ -142,7 +142,7 @@ public:
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case OptGeneric:
|
case OptGeneric:
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
||||||
@ -159,12 +159,12 @@ public:
|
|||||||
bgq_l1p_optimisation(0);
|
bgq_l1p_optimisation(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool EnableBool = true>
|
template <bool EnableBool = true> accelerator
|
||||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
||||||
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out,int interior=1,int exterior=1) {
|
||||||
|
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Nsite; site++) {
|
||||||
for (int s = 0; s < Ls; s++) {
|
for (int s = 0; s < Ls; s++) {
|
||||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
else if (interior) WilsonKernels<Impl>::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out);
|
||||||
@ -176,8 +176,8 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
acceleratorvoid DhopDirK(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Utilities for inserting Wilson conserved current.
|
// Utilities for inserting Wilson conserved current.
|
||||||
@ -213,63 +213,63 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Specialised variants
|
// Specialised variants
|
||||||
void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
|
||||||
|
|
||||||
void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
|
||||||
|
|
||||||
void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
|
||||||
|
|
||||||
void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
accelerator void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
||||||
|
|
||||||
|
accelerator void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
||||||
|
|
||||||
|
accelerator void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Nsite, const FermionField &in,FermionField &out);
|
||||||
|
|
||||||
|
accelerator void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Nsite, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
|
||||||
|
accelerator void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
accelerator void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
WilsonKernels(const ImplParams &p = ImplParams());
|
WilsonKernels(const ImplParams &p = ImplParams());
|
||||||
|
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user