mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Call the fast path compressor for wilson kernels to avoid if else on projector
This commit is contained in:
parent
25efefc5b4
commit
3d21297bbb
@ -678,12 +678,9 @@ PARALLEL_FOR_LOOP
|
|||||||
calls++;
|
calls++;
|
||||||
Mergers.resize(0);
|
Mergers.resize(0);
|
||||||
Packets.resize(0);
|
Packets.resize(0);
|
||||||
_grid->StencilBarrier();
|
|
||||||
HaloGather(source,compress);
|
HaloGather(source,compress);
|
||||||
this->CommunicateBegin(reqs);
|
this->CommunicateBegin(reqs);
|
||||||
_grid->StencilBarrier();
|
|
||||||
this->CommunicateComplete(reqs);
|
this->CommunicateComplete(reqs);
|
||||||
_grid->StencilBarrier();
|
|
||||||
CommsMerge(); // spins
|
CommsMerge(); // spins
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,6 +171,8 @@ namespace QCD {
|
|||||||
class WilsonStencil : public CartesianStencil<vobj,cobj> {
|
class WilsonStencil : public CartesianStencil<vobj,cobj> {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
typedef CartesianCommunicator::CommsRequest_t CommsRequest_t;
|
||||||
|
|
||||||
WilsonStencil(GridBase *grid,
|
WilsonStencil(GridBase *grid,
|
||||||
int npoints,
|
int npoints,
|
||||||
int checkerboard,
|
int checkerboard,
|
||||||
@ -178,79 +180,71 @@ namespace QCD {
|
|||||||
const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
||||||
{ };
|
{ };
|
||||||
|
|
||||||
template < class compressor>
|
|
||||||
std::thread HaloExchangeOptBegin(const Lattice<vobj> &source,compressor &compress) {
|
|
||||||
this->Mergers.resize(0);
|
|
||||||
this->Packets.resize(0);
|
|
||||||
this->HaloGatherOpt(source,compress);
|
|
||||||
return std::thread([&] { this->Communicate(); });
|
|
||||||
}
|
|
||||||
|
|
||||||
template < class compressor>
|
template < class compressor>
|
||||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||||
{
|
{
|
||||||
auto thr = this->HaloExchangeOptBegin(source,compress);
|
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||||
this->HaloExchangeOptComplete(thr);
|
this->Mergers.resize(0);
|
||||||
|
this->Packets.resize(0);
|
||||||
|
this->HaloGatherOpt(source,compress);
|
||||||
|
this->CommunicateBegin(reqs);
|
||||||
|
this->CommunicateComplete(reqs);
|
||||||
|
this->CommsMerge(); // spins
|
||||||
|
this->calls++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HaloExchangeOptComplete(std::thread &thr)
|
|
||||||
{
|
|
||||||
this->CommsMerge(); // spins
|
|
||||||
this->jointime-=usecond();
|
|
||||||
thr.join();
|
|
||||||
this->jointime+=usecond();
|
|
||||||
}
|
|
||||||
|
|
||||||
template < class compressor>
|
template < class compressor>
|
||||||
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
|
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
|
||||||
{
|
{
|
||||||
// conformable(source._grid,_grid);
|
int face_idx=0;
|
||||||
assert(source._grid==this->_grid);
|
|
||||||
this->halogtime-=usecond();
|
|
||||||
|
|
||||||
assert (this->comm_buf.size() == this->_unified_buffer_size );
|
// conformable(source._grid,_grid);
|
||||||
this->u_comm_offset=0;
|
assert(source._grid==this->_grid);
|
||||||
|
this->halogtime-=usecond();
|
||||||
|
|
||||||
|
this->u_comm_offset=0;
|
||||||
|
|
||||||
|
int dag = compress.dag;
|
||||||
|
|
||||||
|
WilsonXpCompressor<cobj,vobj> XpCompress;
|
||||||
|
WilsonYpCompressor<cobj,vobj> YpCompress;
|
||||||
|
WilsonZpCompressor<cobj,vobj> ZpCompress;
|
||||||
|
WilsonTpCompressor<cobj,vobj> TpCompress;
|
||||||
|
WilsonXmCompressor<cobj,vobj> XmCompress;
|
||||||
|
WilsonYmCompressor<cobj,vobj> YmCompress;
|
||||||
|
WilsonZmCompressor<cobj,vobj> ZmCompress;
|
||||||
|
WilsonTmCompressor<cobj,vobj> TmCompress;
|
||||||
|
|
||||||
int dag = compress.dag;
|
// Gather all comms buffers
|
||||||
static std::vector<int> dirs(Nd*2);
|
// for(int point = 0 ; point < _npoints; point++) {
|
||||||
for(int mu=0;mu<Nd;mu++){
|
// compress.Point(point);
|
||||||
if ( dag ) {
|
// HaloGatherDir(source,compress,point,face_idx);
|
||||||
dirs[mu] =mu;
|
// }
|
||||||
dirs[mu+4]=mu+Nd;
|
if ( dag ) {
|
||||||
} else {
|
this->HaloGatherDir(source,XpCompress,Xp,face_idx);
|
||||||
dirs[mu] =mu+Nd;
|
this->HaloGatherDir(source,YpCompress,Yp,face_idx);
|
||||||
dirs[mu+Nd]=mu;
|
this->HaloGatherDir(source,ZpCompress,Zp,face_idx);
|
||||||
}
|
this->HaloGatherDir(source,TpCompress,Tp,face_idx);
|
||||||
}
|
this->HaloGatherDir(source,XmCompress,Xm,face_idx);
|
||||||
|
this->HaloGatherDir(source,YmCompress,Ym,face_idx);
|
||||||
|
this->HaloGatherDir(source,ZmCompress,Zm,face_idx);
|
||||||
WilsonXpCompressor<cobj,vobj> XpCompress;
|
this->HaloGatherDir(source,TmCompress,Tm,face_idx);
|
||||||
this->HaloGatherDir(source,XpCompress,dirs[0]);
|
} else {
|
||||||
|
this->HaloGatherDir(source,XmCompress,Xp,face_idx);
|
||||||
WilsonYpCompressor<cobj,vobj> YpCompress;
|
this->HaloGatherDir(source,YmCompress,Yp,face_idx);
|
||||||
this->HaloGatherDir(source,YpCompress,dirs[1]);
|
this->HaloGatherDir(source,ZmCompress,Zp,face_idx);
|
||||||
|
this->HaloGatherDir(source,TmCompress,Tp,face_idx);
|
||||||
WilsonZpCompressor<cobj,vobj> ZpCompress;
|
this->HaloGatherDir(source,XpCompress,Xm,face_idx);
|
||||||
this->HaloGatherDir(source,ZpCompress,dirs[2]);
|
this->HaloGatherDir(source,YpCompress,Ym,face_idx);
|
||||||
|
this->HaloGatherDir(source,ZpCompress,Zm,face_idx);
|
||||||
WilsonTpCompressor<cobj,vobj> TpCompress;
|
this->HaloGatherDir(source,TpCompress,Tm,face_idx);
|
||||||
this->HaloGatherDir(source,TpCompress,dirs[3]);
|
|
||||||
|
|
||||||
WilsonXmCompressor<cobj,vobj> XmCompress;
|
|
||||||
this->HaloGatherDir(source,XmCompress,dirs[4]);
|
|
||||||
|
|
||||||
WilsonYmCompressor<cobj,vobj> YmCompress;
|
|
||||||
this->HaloGatherDir(source,YmCompress,dirs[5]);
|
|
||||||
|
|
||||||
WilsonZmCompressor<cobj,vobj> ZmCompress;
|
|
||||||
this->HaloGatherDir(source,ZmCompress,dirs[6]);
|
|
||||||
|
|
||||||
WilsonTmCompressor<cobj,vobj> TmCompress;
|
|
||||||
this->HaloGatherDir(source,TmCompress,dirs[7]);
|
|
||||||
|
|
||||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
|
||||||
this->halogtime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
this->face_table_computed=1;
|
||||||
|
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||||
|
this->halogtime+=usecond();
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -403,7 +403,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|||||||
int LLs = in._grid->_rdimensions[0];
|
int LLs = in._grid->_rdimensions[0];
|
||||||
|
|
||||||
DhopCommTime-=usecond();
|
DhopCommTime-=usecond();
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchangeOpt(in,compressor);
|
||||||
DhopCommTime+=usecond();
|
DhopCommTime+=usecond();
|
||||||
|
|
||||||
DhopComputeTime-=usecond();
|
DhopComputeTime-=usecond();
|
||||||
@ -437,7 +437,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|||||||
stat.accum(nthreads);
|
stat.accum(nthreads);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
#if 1
|
#if 0
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU = ss;
|
int sU = ss;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user