mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 10:09:34 +01:00 
			
		
		
		
	Try a better load balancing loop
This commit is contained in:
		| @@ -406,6 +406,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | |||||||
|   // Load imbalance alert. Should use dynamic schedule OMP for loop |   // Load imbalance alert. Should use dynamic schedule OMP for loop | ||||||
|   // Perhaps create a list of only those sites with face work, and  |   // Perhaps create a list of only those sites with face work, and  | ||||||
|   // load balance process the list. |   // load balance process the list. | ||||||
|  | #if 1 | ||||||
| #pragma omp parallel  | #pragma omp parallel  | ||||||
|   { |   { | ||||||
|     int nthreads = omp_get_num_threads(); |     int nthreads = omp_get_num_threads(); | ||||||
| @@ -421,9 +422,28 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg | |||||||
|     else                  Kernels::DhopSite   (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1); |     else                  Kernels::DhopSite   (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1); | ||||||
|     if ( me==0 ) DhopComputeTime2+=usecond(); |     if ( me==0 ) DhopComputeTime2+=usecond(); | ||||||
|   }// end parallel region |   }// end parallel region | ||||||
|  | #else  | ||||||
|  | DhopComputeTime2-=usecond(); | ||||||
|  |   if (dag == DaggerYes) { | ||||||
|  |     parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { | ||||||
|  |       int sU = ss; | ||||||
|  |       int sF = LLs * sU; | ||||||
|  |       Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { | ||||||
|  |       int sU = ss; | ||||||
|  |       int sF = LLs * sU; | ||||||
|  |       Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | DhopComputeTime2+=usecond(); | ||||||
|  | #endif | ||||||
|  |  | ||||||
| #else  | #else  | ||||||
|   assert(0); |   assert(0); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| } | } | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user