1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 04:37:05 +01:00

Pushed the overlap comms tweaks

This commit is contained in:
paboyle
2016-01-11 06:34:22 -08:00
parent dafc74020c
commit fc6ad65751
6 changed files with 96 additions and 18 deletions

View File

@ -249,7 +249,9 @@ void WilsonFermion5D<Impl>::Report(void)
std::cout<<GridLogMessage << "Stencil gather "<<Stencil.gathertime<<" us"<<std::endl;
std::cout<<GridLogMessage << "Stencil gather simd "<<Stencil.gathermtime<<" us"<<std::endl;
std::cout<<GridLogMessage << "Stencil merge simd "<<Stencil.mergetime<<" us"<<std::endl;
std::cout<<GridLogMessage << "Stencil spin simd "<<Stencil.spintime<<" us"<<std::endl;
std::cout<<GridLogMessage << "********************"<<std::endl;
std::cout<<GridLogMessage << "Stencil MB/s "<<(double)Stencil.comms_bytes/Stencil.commtime<<std::endl;
std::cout<<GridLogMessage << "Stencil comm time "<<Stencil.commtime<<" us"<<std::endl;
std::cout<<GridLogMessage << "Stencil join time "<<Stencil.jointime<<" us"<<std::endl;
std::cout<<GridLogMessage << "********************"<<std::endl;
@ -425,6 +427,8 @@ void WilsonFermion5D<Impl>::DhopInternalCommsOverlapCompute(StencilImpl & st, Le
// assert((dag==DaggerNo) ||(dag==DaggerYes));
alltime-=usecond();
int calls;
int updates;
Compressor compressor(dag);
// Assume balanced KMP_AFFINITY; this is forced in GridThread.h

View File

@ -62,13 +62,13 @@ namespace Grid {
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
int DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
int DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
WilsonKernels(const ImplParams &p= ImplParams());

View File

@ -310,7 +310,7 @@ namespace QCD {
template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{
@ -385,6 +385,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeF
if ( SE->_permute ) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
@ -397,7 +398,6 @@ void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeF
num++;
}
// Yp
SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset;
@ -556,6 +556,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeF
vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5));
return 1;
} else if ( num ) {
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
@ -569,14 +570,16 @@ void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeF
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
return 1;
}
return 0;
}
template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
int WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{
@ -822,6 +825,7 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5));
return 1;
} else if ( num ) {
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
@ -835,7 +839,9 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
return 1;
}
return 0;
}
/*