1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Call the fast path compressor for wilson kernels to avoid if else on projector

This commit is contained in:
Peter Boyle 2016-12-27 11:23:13 +00:00
parent 25efefc5b4
commit 3d21297bbb
3 changed files with 56 additions and 65 deletions

View File

@ -678,12 +678,9 @@ PARALLEL_FOR_LOOP
calls++; calls++;
Mergers.resize(0); Mergers.resize(0);
Packets.resize(0); Packets.resize(0);
_grid->StencilBarrier();
HaloGather(source,compress); HaloGather(source,compress);
this->CommunicateBegin(reqs); this->CommunicateBegin(reqs);
_grid->StencilBarrier();
this->CommunicateComplete(reqs); this->CommunicateComplete(reqs);
_grid->StencilBarrier();
CommsMerge(); // spins CommsMerge(); // spins
} }

View File

@ -171,6 +171,8 @@ namespace QCD {
class WilsonStencil : public CartesianStencil<vobj,cobj> { class WilsonStencil : public CartesianStencil<vobj,cobj> {
public: public:
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
WilsonStencil(GridBase *grid, WilsonStencil(GridBase *grid,
int npoints, int npoints,
int checkerboard, int checkerboard,
@ -178,79 +180,71 @@ namespace QCD {
const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances) const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
{ }; { };
template < class compressor>
std::thread HaloExchangeOptBegin(const Lattice<vobj> &source,compressor &compress) {
this->Mergers.resize(0);
this->Packets.resize(0);
this->HaloGatherOpt(source,compress);
return std::thread([&] { this->Communicate(); });
}
template < class compressor> template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress) void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
{ {
auto thr = this->HaloExchangeOptBegin(source,compress); std::vector<std::vector<CommsRequest_t> > reqs;
this->HaloExchangeOptComplete(thr); this->Mergers.resize(0);
this->Packets.resize(0);
this->HaloGatherOpt(source,compress);
this->CommunicateBegin(reqs);
this->CommunicateComplete(reqs);
this->CommsMerge(); // spins
this->calls++;
} }
void HaloExchangeOptComplete(std::thread &thr)
{
this->CommsMerge(); // spins
this->jointime-=usecond();
thr.join();
this->jointime+=usecond();
}
template < class compressor> template < class compressor>
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress) void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
{ {
// conformable(source._grid,_grid); int face_idx=0;
assert(source._grid==this->_grid);
this->halogtime-=usecond();
assert (this->comm_buf.size() == this->_unified_buffer_size ); // conformable(source._grid,_grid);
this->u_comm_offset=0; assert(source._grid==this->_grid);
this->halogtime-=usecond();
this->u_comm_offset=0;
int dag = compress.dag;
WilsonXpCompressor<cobj,vobj> XpCompress;
WilsonYpCompressor<cobj,vobj> YpCompress;
WilsonZpCompressor<cobj,vobj> ZpCompress;
WilsonTpCompressor<cobj,vobj> TpCompress;
WilsonXmCompressor<cobj,vobj> XmCompress;
WilsonYmCompressor<cobj,vobj> YmCompress;
WilsonZmCompressor<cobj,vobj> ZmCompress;
WilsonTmCompressor<cobj,vobj> TmCompress;
int dag = compress.dag; // Gather all comms buffers
static std::vector<int> dirs(Nd*2); // for(int point = 0 ; point < _npoints; point++) {
for(int mu=0;mu<Nd;mu++){ // compress.Point(point);
if ( dag ) { // HaloGatherDir(source,compress,point,face_idx);
dirs[mu] =mu; // }
dirs[mu+4]=mu+Nd; if ( dag ) {
} else { this->HaloGatherDir(source,XpCompress,Xp,face_idx);
dirs[mu] =mu+Nd; this->HaloGatherDir(source,YpCompress,Yp,face_idx);
dirs[mu+Nd]=mu; this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
} this->HaloGatherDir(source,TpCompress,Tp,face_idx);
} this->HaloGatherDir(source,XmCompress,Xm,face_idx);
this->HaloGatherDir(source,YmCompress,Ym,face_idx);
this->HaloGatherDir(source,ZmCompress,Zm,face_idx);
WilsonXpCompressor<cobj,vobj> XpCompress; this->HaloGatherDir(source,TmCompress,Tm,face_idx);
this->HaloGatherDir(source,XpCompress,dirs[0]); } else {
this->HaloGatherDir(source,XmCompress,Xp,face_idx);
WilsonYpCompressor<cobj,vobj> YpCompress; this->HaloGatherDir(source,YmCompress,Yp,face_idx);
this->HaloGatherDir(source,YpCompress,dirs[1]); this->HaloGatherDir(source,ZmCompress,Zp,face_idx);
this->HaloGatherDir(source,TmCompress,Tp,face_idx);
WilsonZpCompressor<cobj,vobj> ZpCompress; this->HaloGatherDir(source,XpCompress,Xm,face_idx);
this->HaloGatherDir(source,ZpCompress,dirs[2]); this->HaloGatherDir(source,YpCompress,Ym,face_idx);
this->HaloGatherDir(source,ZpCompress,Zm,face_idx);
WilsonTpCompressor<cobj,vobj> TpCompress; this->HaloGatherDir(source,TpCompress,Tm,face_idx);
this->HaloGatherDir(source,TpCompress,dirs[3]);
WilsonXmCompressor<cobj,vobj> XmCompress;
this->HaloGatherDir(source,XmCompress,dirs[4]);
WilsonYmCompressor<cobj,vobj> YmCompress;
this->HaloGatherDir(source,YmCompress,dirs[5]);
WilsonZmCompressor<cobj,vobj> ZmCompress;
this->HaloGatherDir(source,ZmCompress,dirs[6]);
WilsonTmCompressor<cobj,vobj> TmCompress;
this->HaloGatherDir(source,TmCompress,dirs[7]);
assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
} }
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
this->halogtime+=usecond();
}
}; };

View File

@ -403,7 +403,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
int LLs = in._grid->_rdimensions[0]; int LLs = in._grid->_rdimensions[0];
DhopCommTime-=usecond(); DhopCommTime-=usecond();
st.HaloExchange(in,compressor); st.HaloExchangeOpt(in,compressor);
DhopCommTime+=usecond(); DhopCommTime+=usecond();
DhopComputeTime-=usecond(); DhopComputeTime-=usecond();
@ -437,7 +437,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
stat.accum(nthreads); stat.accum(nthreads);
#endif #endif
} else { } else {
#if 1 #if 0
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) { for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU = ss; int sU = ss;