mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Overlapped Comm for Wilson DhopInternal
This commit is contained in:
parent
0f468e2179
commit
b938202081
@ -322,15 +322,98 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
/*Change starts*/
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerial(st,lo,U,in,out,dag);
|
||||
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
#ifdef GRID_OMP
|
||||
Compressor compressor;
|
||||
int len = U._grid->oSites();
|
||||
const int LLs = 1;
|
||||
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
st.CommsMergeSHM(compressor);
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int ncomms = CartesianCommunicator::nCommThreads;
|
||||
if (ncomms == -1) ncomms = 1;
|
||||
assert(nthreads > ncomms);
|
||||
if (tid >= ncomms) {
|
||||
nthreads -= ncomms;
|
||||
int ttid = tid - ncomms;
|
||||
int n = len;
|
||||
int chunk = n / nthreads;
|
||||
int rem = n % nthreads;
|
||||
int myblock, myn;
|
||||
if (ttid < rem) {
|
||||
myblock = ttid * chunk + ttid;
|
||||
myn = chunk+1;
|
||||
} else {
|
||||
myblock = ttid*chunk + rem;
|
||||
myn = chunk;
|
||||
}
|
||||
// do the compute
|
||||
if (dag == DaggerYes) {
|
||||
|
||||
for (int sss = myblock; sss < myblock+myn; ++sss) {
|
||||
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} else {
|
||||
for (int sss = myblock; sss < myblock+myn; ++sss) {
|
||||
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} //else
|
||||
|
||||
} else {
|
||||
st.CommunicateThreaded();
|
||||
}
|
||||
|
||||
Compressor compressor(dag);
|
||||
|
||||
if (dag == DaggerYes) {
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} else {
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
}
|
||||
|
||||
} //pragma
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
Compressor compressor(dag);
|
||||
st.HaloExchange(in, compressor);
|
||||
|
||||
@ -344,6 +427,7 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
}
|
||||
}
|
||||
};
|
||||
/*Change ends */
|
||||
|
||||
FermOpTemplateInstantiate(WilsonFermion);
|
||||
AdjointFermOpTemplateInstantiate(WilsonFermion);
|
||||
|
@ -115,6 +115,12 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
||||
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
// Constructor
|
||||
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
|
@ -53,7 +53,7 @@ template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
@ -70,27 +70,27 @@ public:
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@ -200,6 +200,7 @@ private:
|
||||
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user