1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 15:55:37 +00:00

Try a better load balancing loop

This commit is contained in:
paboyle 2017-04-22 19:27:41 +01:00
parent abba44a837
commit b722889234

View File

@ -406,6 +406,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
// Load imbalance alert. Should use dynamic schedule OMP for loop // Load imbalance alert. Should use dynamic schedule OMP for loop
// Perhaps create a list of only those sites with face work, and // Perhaps create a list of only those sites with face work, and
// load balance process the list. // load balance process the list.
#if 1
#pragma omp parallel #pragma omp parallel
{ {
int nthreads = omp_get_num_threads(); int nthreads = omp_get_num_threads();
@ -421,9 +422,28 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1); else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
if ( me==0 ) DhopComputeTime2+=usecond(); if ( me==0 ) DhopComputeTime2+=usecond();
}// end parallel region }// end parallel region
#else
DhopComputeTime2-=usecond();
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
} else {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
}
DhopComputeTime2+=usecond();
#endif
#else #else
assert(0); assert(0);
#endif #endif
} }
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,