mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 15:57:05 +01:00
Improvements to the assembler interface that let us move chunks of the
site and s loop into the kernels. This will save on function call overhead and guarantee L2 prefetching strategy is right since OMP can't distribute the sub-chunks of work.
This commit is contained in:
@ -41,7 +41,7 @@ namespace QCD {
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
@ -72,8 +72,8 @@ static int signInit = setupSigns();
|
||||
|
||||
template<>
|
||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#undef VMOVIDUP
|
||||
@ -87,29 +87,29 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaug
|
||||
template<>
|
||||
void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#endif
|
||||
|
||||
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
|
||||
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
}}
|
||||
|
||||
|
Reference in New Issue
Block a user