mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Try a better load balancing loop
This commit is contained in:
parent
abba44a837
commit
b722889234
@ -406,6 +406,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
// Load imbalance alert. Should use dynamic schedule OMP for loop
|
||||
// Perhaps create a list of only those sites with face work, and
|
||||
// load balance process the list.
|
||||
#if 1
|
||||
#pragma omp parallel
|
||||
{
|
||||
int nthreads = omp_get_num_threads();
|
||||
@ -421,9 +422,28 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
|
||||
if ( me==0 ) DhopComputeTime2+=usecond();
|
||||
}// end parallel region
|
||||
#else
|
||||
DhopComputeTime2-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
||||
}
|
||||
} else {
|
||||
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
||||
}
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
#endif
|
||||
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
|
||||
}
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
|
||||
|
Loading…
Reference in New Issue
Block a user