1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-10 06:00:45 +01:00

Clean implementation of the exterior faces listing only those points on the boudary

This commit is contained in:
Peter Boyle 2017-04-26 02:34:52 -04:00
parent 2ce898efa3
commit fd1eb7de13

View File

@ -367,6 +367,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DhopTotalTime+=usecond(); DhopTotalTime+=usecond();
} }
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,
@ -380,7 +381,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
int LLs = in._grid->_rdimensions[0]; int LLs = in._grid->_rdimensions[0];
int len = U._grid->oSites(); int len = U._grid->oSites();
DhopFaceTime-=usecond(); DhopFaceTime-=usecond();
st.HaloExchangeOptGather(in,compressor); st.HaloExchangeOptGather(in,compressor);
DhopFaceTime+=usecond(); DhopFaceTime+=usecond();
@ -390,6 +391,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
st.CommunicateBegin(reqs); st.CommunicateBegin(reqs);
st.CommsMergeSHM(compressor); st.CommsMergeSHM(compressor);
// Perhaps use omp task and region
#pragma omp parallel #pragma omp parallel
{ {
int nthreads = omp_get_num_threads(); int nthreads = omp_get_num_threads();
@ -419,70 +421,31 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
// Load imbalance alert. Should use dynamic schedule OMP for loop // Load imbalance alert. Should use dynamic schedule OMP for loop
// Perhaps create a list of only those sites with face work, and // Perhaps create a list of only those sites with face work, and
// load balance process the list. // load balance process the list.
#if 1
#if 0
#pragma omp parallel
{
int nthreads = omp_get_num_threads();
int me = omp_get_thread_num();
int myoff, mywork;
GridThread::GetWork(len,me,mywork,myoff,nthreads);
int sF = LLs * myoff;
// Exterior links in stencil
if ( me==0 ) DhopComputeTime2-=usecond();
if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
if ( me==0 ) DhopComputeTime2+=usecond();
}// end parallel region
#else
DhopComputeTime2-=usecond(); DhopComputeTime2-=usecond();
if (dag == DaggerYes) { if (dag == DaggerYes) {
#pragma omp parallel for schedule(static,1) int sz=st.surface_list.size();
for (int ss = 0; ss < st.surface_list.size(); ss++) { parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss]; int sU = st.surface_list[ss];
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
} }
} else { } else {
#pragma omp parallel for schedule(static,1) int sz=st.surface_list.size();
for (int ss = 0; ss < st.surface_list.size(); ss++) { parallel_for (int ss = 0; ss < sz; ss++) {
int sU = st.surface_list[ss]; int sU = st.surface_list[ss];
int sF = LLs * sU; int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
} }
} }
DhopComputeTime2+=usecond(); DhopComputeTime2+=usecond();
#endif
#else
DhopComputeTime2-=usecond();
if (dag == DaggerYes) {
#pragma omp parallel for schedule(static,4)
for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
} else {
#pragma omp parallel for schedule(static,1)
for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss;
int sF = LLs * sU;
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
}
}
DhopComputeTime2+=usecond();
#endif
#else #else
assert(0); assert(0);
#endif #endif
} }
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,