mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Improvements to the assembler interface that let us move chunks of the
site and s loop into the kernels. This will save on function call overhead and guarantee L2 prefetching strategy is right since OMP can't distribute the sub-chunks of work.
This commit is contained in:
		@@ -40,23 +40,42 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
 | 
			
		||||
template<class Impl> 
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
 | 
			
		||||
						  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
						  int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
						  int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  if ( AsmOpt )     WilsonKernels<Impl>::DiracOptAsmDhopSite(st,U,buf,sF,sU,in,out);
 | 
			
		||||
  else if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSite(st,U,buf,sF,sU,in,out);
 | 
			
		||||
  else              WilsonKernels<Impl>::DiracOptGenericDhopSite(st,U,buf,sF,sU,in,out);
 | 
			
		||||
  if ( AsmOpt ) {
 | 
			
		||||
 | 
			
		||||
    WilsonKernels<Impl>::DiracOptAsmDhopSite(st,U,buf,sF,sU,Ls,Ns,in,out);
 | 
			
		||||
 | 
			
		||||
  } else {
 | 
			
		||||
 | 
			
		||||
    for(int site=0;site<Ns;site++) {
 | 
			
		||||
      for(int s=0;s<Ls;s++) {
 | 
			
		||||
	if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSite(st,U,buf,sF,sU,in,out);
 | 
			
		||||
	else         WilsonKernels<Impl>::DiracOptGenericDhopSite(st,U,buf,sF,sU,in,out);
 | 
			
		||||
	sF++;
 | 
			
		||||
      }
 | 
			
		||||
      sU++;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl> 
 | 
			
		||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
 | 
			
		||||
					   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
			
		||||
					   int sF,int sU,const FermionField &in, FermionField &out)
 | 
			
		||||
					   int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
 | 
			
		||||
{
 | 
			
		||||
  // No asm implementation yet.
 | 
			
		||||
  //  if ( AsmOpt )     WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,U,buf,sF,sU,in,out);
 | 
			
		||||
  //  else
 | 
			
		||||
  if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,U,buf,sF,sU,in,out);
 | 
			
		||||
  else         WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,U,buf,sF,sU,in,out);
 | 
			
		||||
  for(int site=0;site<Ns;site++) {
 | 
			
		||||
    for(int s=0;s<Ls;s++) {
 | 
			
		||||
      if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,U,buf,sF,sU,in,out);
 | 
			
		||||
      else         WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,U,buf,sF,sU,in,out);
 | 
			
		||||
      sF++;
 | 
			
		||||
    }
 | 
			
		||||
    sU++;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user