mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Move barrier into the StencilSend begin routine
This commit is contained in:
parent
74f10c2dc0
commit
75bb6b2b40
@ -359,6 +359,7 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||||
{
|
{
|
||||||
|
accelerator_barrier();
|
||||||
for(int i=0;i<Packets.size();i++){
|
for(int i=0;i<Packets.size();i++){
|
||||||
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||||
Packets[i].send_buf,
|
Packets[i].send_buf,
|
||||||
@ -371,39 +372,19 @@ public:
|
|||||||
|
|
||||||
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||||
{
|
{
|
||||||
_grid->StencilSendToRecvFromComplete(MpiReqs,i);
|
_grid->StencilSendToRecvFromComplete(MpiReqs,0);
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Blocking send and receive. Either sequential or parallel.
|
// Blocking send and receive. Either sequential or parallel.
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
void Communicate(void)
|
void Communicate(void)
|
||||||
{
|
{
|
||||||
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){
|
/////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////////////////////////
|
// Concurrent and non-threaded asynch calls to MPI
|
||||||
// several way threaded on different communicators.
|
/////////////////////////////////////////////////////////
|
||||||
// Cannot combine with Dirichlet operators
|
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||||
// This scheme is needed on Intel Omnipath for best performance
|
this->CommunicateBegin(reqs);
|
||||||
// Deprecate once there are very few omnipath clusters
|
this->CommunicateComplete(reqs);
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
int nthreads = CartesianCommunicator::nCommThreads;
|
|
||||||
int old = GridThread::GetThreads();
|
|
||||||
GridThread::SetThreads(nthreads);
|
|
||||||
thread_for(i,Packets.size(),{
|
|
||||||
_grid->StencilSendToRecvFrom(Packets[i].send_buf,
|
|
||||||
Packets[i].to_rank,Packets[i].do_send,
|
|
||||||
Packets[i].recv_buf,
|
|
||||||
Packets[i].from_rank,Packets[i].do_recv,
|
|
||||||
Packets[i].bytes,i);
|
|
||||||
});
|
|
||||||
GridThread::SetThreads(old);
|
|
||||||
} else {
|
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
// Concurrent and non-threaded asynch calls to MPI
|
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
|
||||||
this->CommunicateBegin(reqs);
|
|
||||||
this->CommunicateComplete(reqs);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class compressor> void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
template<class compressor> void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
||||||
@ -483,7 +464,6 @@ public:
|
|||||||
face_table_computed=1;
|
face_table_computed=1;
|
||||||
assert(u_comm_offset==_unified_buffer_size);
|
assert(u_comm_offset==_unified_buffer_size);
|
||||||
|
|
||||||
accelerator_barrier();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
Loading…
x
Reference in New Issue
Block a user