mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-19 00:07:05 +01:00
Merge branch 'develop' of https://github.com/paboyle/Grid into feature/rare_kaon
# Conflicts: # lib/qcd/action/fermion/WilsonFermion5D.cc # tests/hadrons/Test_hadrons_rarekaon.cc
This commit is contained in:
@ -117,49 +117,20 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
|
||||
// Allocate the required comms buffer
|
||||
ImportGauge(_Umu);
|
||||
|
||||
// Build lists of exterior only nodes
|
||||
int LLs = FiveDimGrid._rdimensions[0];
|
||||
int vol4;
|
||||
vol4=FourDimGrid.oSites();
|
||||
Stencil.BuildSurfaceList(LLs,vol4);
|
||||
vol4=FourDimRedBlackGrid.oSites();
|
||||
StencilEven.BuildSurfaceList(LLs,vol4);
|
||||
StencilOdd.BuildSurfaceList(LLs,vol4);
|
||||
|
||||
std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size()
|
||||
<<" " << StencilEven.surface_list.size()<<std::endl;
|
||||
|
||||
}
|
||||
/*
|
||||
template<class Impl>
|
||||
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
RealD _M5,const ImplParams &p) :
|
||||
{
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==0); // Checkerboard the s-direction
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
Ls=FiveDimGrid._fdimensions[0];
|
||||
assert(FiveDimGrid._processors[0] ==1);
|
||||
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||
|
||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
|
||||
assert(FourDimGrid._simd_layout[d]=1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||
|
||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
{
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::Report(void)
|
||||
@ -396,6 +367,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DhopTotalTime+=usecond();
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
@ -409,12 +381,21 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
|
||||
int LLs = in._grid->_rdimensions[0];
|
||||
int len = U._grid->oSites();
|
||||
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.HaloExchangeOptGather(in,compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||
|
||||
// Rely on async comms; start comms before merge of local data
|
||||
DhopCommTime-=usecond();
|
||||
st.CommunicateBegin(reqs);
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
// Perhaps use omp task and region
|
||||
#pragma omp parallel
|
||||
{
|
||||
int nthreads = omp_get_num_threads();
|
||||
@ -425,8 +406,6 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
int sF = LLs * myoff;
|
||||
|
||||
if ( me == 0 ) {
|
||||
DhopCommTime-=usecond();
|
||||
st.CommunicateBegin(reqs);
|
||||
st.CommunicateComplete(reqs);
|
||||
DhopCommTime+=usecond();
|
||||
} else {
|
||||
@ -439,28 +418,37 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
}
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
int nthreads = omp_get_num_threads();
|
||||
int me = omp_get_thread_num();
|
||||
int myoff, mywork;
|
||||
|
||||
GridThread::GetWork(len,me,mywork,myoff,nthreads);
|
||||
int sF = LLs * myoff;
|
||||
|
||||
// Exterior links in stencil
|
||||
if ( me==0 ) DhopComputeTime2-=usecond();
|
||||
if (dag == DaggerYes) Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
|
||||
else Kernels::DhopSite (st,lo,U,st.CommBuf(),sF,myoff,LLs,mywork,in,out,0,1);
|
||||
if ( me==0 ) DhopComputeTime2+=usecond();
|
||||
}// end parallel region
|
||||
// Load imbalance alert. Should use dynamic schedule OMP for loop
|
||||
// Perhaps create a list of only those sites with face work, and
|
||||
// load balance process the list.
|
||||
DhopComputeTime2-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||
int sU = st.surface_list[ss];
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
||||
}
|
||||
} else {
|
||||
int sz=st.surface_list.size();
|
||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||
int sU = st.surface_list[ss];
|
||||
int sF = LLs * sU;
|
||||
Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1);
|
||||
}
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
|
Reference in New Issue
Block a user