mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-16 23:07:05 +01:00
Merge branch 'master' of https://github.com/paboyle/Grid
This commit is contained in:
@ -266,11 +266,8 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
if( this->HandOptDslash ) {
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
if ( LebesgueOrder::UseLebesgueOrder ) {
|
||||
sU=lo.Reorder(ss);
|
||||
}
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,comm_buf,sF,sU,in,out);
|
||||
}
|
||||
@ -323,52 +320,42 @@ PARALLEL_FOR_LOOP
|
||||
// Counter.Report();
|
||||
// }
|
||||
} else if( this->HandOptDslash ) {
|
||||
/*
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int t=0;t<threads;t++){
|
||||
|
||||
int hyperthread = t%HT;
|
||||
int core = t/HT;
|
||||
|
||||
int sswork, swork,soff, sU,sF;
|
||||
|
||||
sswork = (nwork + cores-1)/cores;
|
||||
int sswork, swork,soff,ssoff, sU,sF;
|
||||
|
||||
GridThread::GetWork(nwork,core,sswork,ssoff,cores);
|
||||
GridThread::GetWork(Ls , hyperthread, swork, soff,HT);
|
||||
|
||||
for(int ss=0;ss<sswork;ss++){
|
||||
sU=ss+core*sswork; // max locality within an L2 slice
|
||||
if ( LebesgueOrder::UseLebesgueOrder ) {
|
||||
sU = lo.Reorder(sU);
|
||||
sU=ss+ ssoff;
|
||||
for(int s=soff;s<soff+swork;s++){
|
||||
sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
|
||||
}
|
||||
if ( sU < nwork ) {
|
||||
for(int s=soff;s<soff+swork;s++){
|
||||
sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
if ( LebesgueOrder::UseLebesgueOrder ) {
|
||||
sU=lo.Reorder(ss);
|
||||
}
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
// int sU=lo.Reorder(ss);
|
||||
int sU=ss;
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptDhopSite(st,U,comm_buf,sF,sU,in,out);
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ namespace Grid {
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
||||
#if defined(AVX512) || defined(IMCI)
|
||||
void DiracOptAsmDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||
void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,uint64_t *);
|
||||
#else
|
||||
|
Reference in New Issue
Block a user