mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
GPU happy. Still need to prevent hand kernels being callable under NVCC
This commit is contained in:
parent
0c2498fe2f
commit
07ee87ff5a
@ -163,7 +163,7 @@ class FourierAcceleratedPV {
|
|||||||
for (int sidx=0;sidx<group_in_s;sidx++) {
|
for (int sidx=0;sidx<group_in_s;sidx++) {
|
||||||
|
|
||||||
int s = sgroup*group_in_s + sidx;
|
int s = sgroup*group_in_s + sidx;
|
||||||
int sprime = Ls-s-1;
|
// int sprime = Ls-s-1;
|
||||||
|
|
||||||
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
|
RealD phase = M_PI / (RealD)Ls * (2.0 * s + 1.0);
|
||||||
RealD cosp = ::cos(phase);
|
RealD cosp = ::cos(phase);
|
||||||
|
@ -259,7 +259,7 @@ void ImprovedStaggeredFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGauge
|
|||||||
auto B_v = B.View();
|
auto B_v = B.View();
|
||||||
auto Btilde_v = Btilde.View();
|
auto Btilde_v = Btilde.View();
|
||||||
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
|
thread_loop( (int sss = 0; sss < B.Grid()->oSites(); sss++), {
|
||||||
Kernels::DhopDir(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Force in three link terms
|
// Force in three link terms
|
||||||
@ -380,7 +380,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDir(const FermionField &in, FermionFiel
|
|||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
|
thread_loop( (int sss = 0; sss < in.Grid()->oSites(); sss++) , {
|
||||||
Kernels::DhopDir(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionFi
|
|||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
int sF = s+Ls*sU;
|
int sF = s+Ls*sU;
|
||||||
Kernels::DhopDir(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
|
Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sF, sU, in_v, out_v, dir, disp);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@ -427,15 +427,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
auto out_v = out.View();
|
auto out_v = out.View();
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
|
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++), {
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
|
Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v);
|
||||||
}
|
});
|
||||||
} else {
|
} else {
|
||||||
parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
|
thread_loop( (int ss = 0; ss < U.Grid()->oSites(); ss++) ,{
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
|
Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v);
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
DhopComputeTime += usecond();
|
||||||
DhopTotalTime += usecond();
|
DhopTotalTime += usecond();
|
||||||
|
@ -170,7 +170,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &
|
|||||||
SiteSpinor *buf, int LLs, int sU,
|
SiteSpinor *buf, int LLs, int sU,
|
||||||
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
const FermionFieldView &in, FermionFieldView &out,int dag) {
|
||||||
const SiteSpinor *chi_p;
|
const SiteSpinor *chi_p;
|
||||||
SiteSpinor chi;
|
// SiteSpinor chi;
|
||||||
SiteSpinor Uchi;
|
SiteSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
@ -276,8 +276,8 @@ void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void StaggeredKernels<Impl>::DhopDir( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
|
void StaggeredKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF,
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
|
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp)
|
||||||
{
|
{
|
||||||
// Disp should be either +1,-1,+3,-3
|
// Disp should be either +1,-1,+3,-3
|
||||||
// What about "dag" ?
|
// What about "dag" ?
|
||||||
|
@ -49,8 +49,8 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void DhopDir(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Generic Nc kernels
|
// Generic Nc kernels
|
||||||
|
@ -963,10 +963,10 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
|||||||
SiteSpinor *buf, int LLs, \
|
SiteSpinor *buf, int LLs, \
|
||||||
int sU, const FermionFieldView &in, FermionFieldView &out,int dag);
|
int sU, const FermionFieldView &in, FermionFieldView &out,int dag);
|
||||||
|
|
||||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
|
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
|
||||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
|
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
|
||||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
|
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD);
|
||||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
|
//KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF);
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -247,7 +247,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
Simd U_22;
|
Simd U_22;
|
||||||
|
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
int offset,local,perm, ptype;
|
int offset, ptype, local, perm;
|
||||||
|
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int skew;
|
int skew;
|
||||||
@ -323,7 +323,7 @@ void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
Simd U_22;
|
Simd U_22;
|
||||||
|
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
int offset,local,perm, ptype;
|
int offset, ptype, local, perm;
|
||||||
|
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int skew;
|
int skew;
|
||||||
|
@ -109,8 +109,10 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
|||||||
for (int j = 0; j < Ns; j++)
|
for (int j = 0; j < Ns; j++)
|
||||||
for (int k = 0; k < Ns; k++)
|
for (int k = 0; k < Ns; k++)
|
||||||
for (int a = 0; a < DimRep; a++)
|
for (int a = 0; a < DimRep; a++)
|
||||||
for (int b = 0; b < DimRep; b++)
|
for (int b = 0; b < DimRep; b++){
|
||||||
EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b);
|
auto zz = Qx()(j, k)(a, b);
|
||||||
|
EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex<double>(zz);
|
||||||
|
}
|
||||||
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
|
// if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl;
|
||||||
|
|
||||||
EigenInvCloverOp = EigenCloverOp.inverse();
|
EigenInvCloverOp = EigenCloverOp.inverse();
|
||||||
|
@ -104,8 +104,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||||
|
|
||||||
for(int d=0;d<4;d++){
|
for(int d=0;d<4;d++){
|
||||||
assert(FourDimGrid._simd_layout[d]=1);
|
assert(FourDimGrid._simd_layout[d]==1);
|
||||||
assert(FourDimRedBlackGrid._simd_layout[d]=1);
|
assert(FourDimRedBlackGrid._simd_layout[d]==1);
|
||||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -528,14 +528,14 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
|||||||
auto U_v = U.View();
|
auto U_v = U.View();
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
Kernels::DhopDag(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
|
||||||
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
|
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
|
||||||
// int sU = ss;
|
// int sU = ss;
|
||||||
// int sF = LLs * sU;
|
// int sF = LLs * sU;
|
||||||
// Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
// Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||||
// }
|
// }
|
||||||
} else {
|
} else {
|
||||||
Kernels::Dhop(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U_v.size(),in,out);
|
||||||
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
|
// parallel_for (int ss = 0; ss < U.Grid()->oSites(); ss++) {
|
||||||
// int sU = ss;
|
// int sU = ss;
|
||||||
// int sF = LLs * sU;
|
// int sF = LLs * sU;
|
||||||
@ -672,7 +672,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
|||||||
for(int idx=0;idx<_grid->lSites();idx++){
|
for(int idx=0;idx<_grid->lSites();idx++){
|
||||||
Coordinate lcoor(Nd);
|
Coordinate lcoor(Nd);
|
||||||
Tcomplex cc;
|
Tcomplex cc;
|
||||||
RealD sgn;
|
// RealD sgn;
|
||||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
peekLocalSite(cc,cosha,lcoor);
|
peekLocalSite(cc,cosha,lcoor);
|
||||||
assert((double)real(cc)>=1.0);
|
assert((double)real(cc)>=1.0);
|
||||||
@ -867,7 +867,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
|||||||
for(int idx=0;idx<_grid->lSites();idx++){
|
for(int idx=0;idx<_grid->lSites();idx++){
|
||||||
Coordinate lcoor(Nd);
|
Coordinate lcoor(Nd);
|
||||||
Tcomplex cc;
|
Tcomplex cc;
|
||||||
RealD sgn;
|
// RealD sgn;
|
||||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
peekLocalSite(cc,cosha,lcoor);
|
peekLocalSite(cc,cosha,lcoor);
|
||||||
assert((double)real(cc)>=1.0);
|
assert((double)real(cc)>=1.0);
|
||||||
|
@ -52,12 +52,12 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
static void Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior=1,int exterior=1) ;
|
int interior=1,int exterior=1) ;
|
||||||
static void DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior=1,int exterior=1) ;
|
int interior=1,int exterior=1) ;
|
||||||
|
|
||||||
template <bool EnableBool = true> static accelerator
|
template <bool EnableBool = true> static accelerator
|
||||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||||
|
@ -137,9 +137,11 @@
|
|||||||
int nmax=U.oSites();
|
int nmax=U.oSites();
|
||||||
for(int site=0;site<Ns;site++) {
|
for(int site=0;site<Ns;site++) {
|
||||||
#ifndef EXTERIOR
|
#ifndef EXTERIOR
|
||||||
int sU =lo.Reorder(ssU);
|
// int sU =lo.Reorder(ssU);
|
||||||
|
int sU =ssU;
|
||||||
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
int ssn=ssU+1; if(ssn>=nmax) ssn=0;
|
||||||
int sUn=lo.Reorder(ssn);
|
// int sUn=lo.Reorder(ssn);
|
||||||
|
int sUn=ssn;
|
||||||
LOCK_GAUGE(0);
|
LOCK_GAUGE(0);
|
||||||
#else
|
#else
|
||||||
int sU =ssU;
|
int sU =ssU;
|
||||||
|
@ -303,9 +303,9 @@ GPU_EMPTY(GparityWilsonImplD);
|
|||||||
GPU_EMPTY(GparityWilsonImplDF);
|
GPU_EMPTY(GparityWilsonImplDF);
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior,int exterior)
|
int interior,int exterior)
|
||||||
{
|
{
|
||||||
auto U_v = U.View();
|
auto U_v = U.View();
|
||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
@ -319,7 +319,8 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
|
|||||||
// uint64_t lane = cur % nsimd;
|
// uint64_t lane = cur % nsimd;
|
||||||
cur = cur / nsimd;
|
cur = cur / nsimd;
|
||||||
uint64_t s = cur%Ls;
|
uint64_t s = cur%Ls;
|
||||||
uint64_t sF = cur; cur = cur / Ls;
|
// uint64_t sF = cur;
|
||||||
|
cur = cur / Ls;
|
||||||
uint64_t sU = cur;
|
uint64_t sU = cur;
|
||||||
WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);
|
WilsonKernels<Impl>::GpuDhopSite(st_v,U_v[sU],buf,Ls,s,sU,in_v,out_v);
|
||||||
});
|
});
|
||||||
@ -332,9 +333,9 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DhopDag(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior,int exterior)
|
int interior,int exterior)
|
||||||
{
|
{
|
||||||
auto U_v = U.View();
|
auto U_v = U.View();
|
||||||
auto in_v = in.View();
|
auto in_v = in.View();
|
||||||
@ -349,7 +350,8 @@ void WilsonKernels<Impl>::Dhop(int Opt,StencilImpl &st, DoubledGaugeField &U, S
|
|||||||
// uint64_t lane = cur % nsimd;
|
// uint64_t lane = cur % nsimd;
|
||||||
cur = cur / nsimd;
|
cur = cur / nsimd;
|
||||||
uint64_t s = cur%Ls;
|
uint64_t s = cur%Ls;
|
||||||
uint64_t sF = cur; cur = cur / Ls;
|
//uint64_t sF = cur;
|
||||||
|
cur = cur / Ls;
|
||||||
uint64_t sU = cur;
|
uint64_t sU = cur;
|
||||||
WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v,buf,Ls,s,sU,in_v,out_v);
|
WilsonKernels<Impl>::GpuDhopSiteDag(st_v,U_v,buf,Ls,s,sU,in_v,out_v);
|
||||||
});
|
});
|
||||||
|
@ -453,6 +453,7 @@ template<class Impl> void
|
|||||||
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
|
#ifndef GRID_NVCC
|
||||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -471,12 +472,16 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
|
|||||||
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||||
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||||
HAND_RESULT(ss);
|
HAND_RESULT(ss);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
|
#ifndef GRID_NVCC
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
@ -494,12 +499,16 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
|
|||||||
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||||
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||||
HAND_RESULT(ss);
|
HAND_RESULT(ss);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
|
#ifndef GRID_NVCC
|
||||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -518,12 +527,16 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
|
|||||||
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||||
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||||
HAND_RESULT(ss);
|
HAND_RESULT(ss);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
|
#ifndef GRID_NVCC
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
@ -541,12 +554,16 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
|
|||||||
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||||
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||||
HAND_RESULT(ss);
|
HAND_RESULT(ss);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void
|
template<class Impl> void
|
||||||
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
|
#ifndef GRID_NVCC
|
||||||
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
@ -566,12 +583,16 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
|
|||||||
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(ZP_PROJ,1,Zm,ZP_RECON_ACCUM);
|
||||||
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM);
|
||||||
HAND_RESULT_EXT(ss);
|
HAND_RESULT_EXT(ss);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
|
||||||
{
|
{
|
||||||
|
#ifndef GRID_NVCC
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
typedef typename Simd::vector_type V;
|
typedef typename Simd::vector_type V;
|
||||||
|
|
||||||
@ -590,6 +611,9 @@ void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldVi
|
|||||||
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM);
|
||||||
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM);
|
||||||
HAND_RESULT_EXT(ss);
|
HAND_RESULT_EXT(ss);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////// Wilson ; uses this implementation /////////////////////
|
////////////// Wilson ; uses this implementation /////////////////////
|
||||||
|
@ -931,13 +931,13 @@ template void WilsonKernels<A>::HandDhopSiteExt(StencilView &st, DoubledGaugeFie
|
|||||||
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
template void WilsonKernels<A>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
|
||||||
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
|
int ss,int sU,const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
|
||||||
INSTANTIATE_THEM(GparityWilsonImplF);
|
//INSTANTIATE_THEM(GparityWilsonImplF);
|
||||||
INSTANTIATE_THEM(GparityWilsonImplD);
|
//INSTANTIATE_THEM(GparityWilsonImplD);
|
||||||
INSTANTIATE_THEM(GparityWilsonImplFH);
|
//INSTANTIATE_THEM(GparityWilsonImplFH);
|
||||||
INSTANTIATE_THEM(GparityWilsonImplDF);
|
//INSTANTIATE_THEM(GparityWilsonImplDF);
|
||||||
INSTANTIATE_THEM(DomainWallVec5dImplFH);
|
//INSTANTIATE_THEM(DomainWallVec5dImplFH);
|
||||||
INSTANTIATE_THEM(DomainWallVec5dImplDF);
|
//INSTANTIATE_THEM(DomainWallVec5dImplDF);
|
||||||
INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
|
//INSTANTIATE_THEM(ZDomainWallVec5dImplFH);
|
||||||
INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
|
//INSTANTIATE_THEM(ZDomainWallVec5dImplDF);
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user