1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

MPI_THREAD_SINGLE hack for Fugaku, enabled by -DTOFU

This commit is contained in:
nmeyer-ur 2020-05-11 13:21:39 +02:00
parent b2fd8b993a
commit ffaaed679e

View File

@ -1,6 +1,6 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_mpi.cc Source file: ./lib/communicator/Communicator_mpi.cc
@ -35,7 +35,7 @@ Grid_MPI_Comm CartesianCommunicator::communicator_world;
//////////////////////////////////////////// ////////////////////////////////////////////
// First initialise of comms system // First initialise of comms system
//////////////////////////////////////////// ////////////////////////////////////////////
void CartesianCommunicator::Init(int *argc, char ***argv) void CartesianCommunicator::Init(int *argc, char ***argv)
{ {
int flag; int flag;
@ -43,6 +43,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
MPI_Initialized(&flag); // needed to coexist with other libs apparently MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) { if ( !flag ) {
#if defined (TOFU) // hack for FUGAKU, credits go to Issaku Kanamori
nCommThreads=1;
MPI_Init(argc,argv);
#else
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
@ -53,6 +57,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
if( (nCommThreads > 1) && (provided != MPI_THREAD_MULTIPLE) ) { if( (nCommThreads > 1) && (provided != MPI_THREAD_MULTIPLE) ) {
assert(0); assert(0);
} }
#endif
} }
// Never clean up as done once. // Never clean up as done once.
@ -91,7 +96,7 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor)
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
// Initialises from communicator_world // Initialises from communicator_world
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors) CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
{ {
MPI_Comm optimal_comm; MPI_Comm optimal_comm;
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
@ -110,7 +115,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
////////////////////////////////// //////////////////////////////////
// Try to subdivide communicator // Try to subdivide communicator
////////////////////////////////// //////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank) CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank)
{ {
_ndimension = processors.size(); assert(_ndimension>=1); _ndimension = processors.size(); assert(_ndimension>=1);
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
@ -127,7 +132,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator // split the communicator
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// int Nparent = parent._processors ; // int Nparent = parent._processors ;
int Nparent; int Nparent;
MPI_Comm_size(parent.communicator,&Nparent); MPI_Comm_size(parent.communicator,&Nparent);
@ -149,13 +154,13 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
} }
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
int crank; int crank;
// Mpi uses the reverse Lexico convention to us; so reversed routines called // Mpi uses the reverse Lexico convention to us; so reversed routines called
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
MPI_Comm comm_split; MPI_Comm comm_split;
if ( Nchild > 1 ) { if ( Nchild > 1 ) {
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Split the communicator // Split the communicator
@ -180,11 +185,11 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
SetCommunicator(comm_split); SetCommunicator(comm_split);
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Free the temp communicator // Free the temp communicator
/////////////////////////////////////////////// ///////////////////////////////////////////////
MPI_Comm_free(&comm_split); MPI_Comm_free(&comm_split);
if(0){ if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl; std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
for(int d=0;d<processors.size();d++){ for(int d=0;d<processors.size();d++){
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl; std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
@ -245,7 +250,7 @@ CartesianCommunicator::~CartesianCommunicator()
for(int i=0;i<communicator_halo.size();i++){ for(int i=0;i<communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]); MPI_Comm_free(&communicator_halo[i]);
} }
} }
} }
void CartesianCommunicator::GlobalSum(uint32_t &u){ void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
@ -310,7 +315,7 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
if ( _processor == sender ) { if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
} }
if ( _processor == receiver ) { if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
} }
} }
@ -325,17 +330,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int myrank = _processor; int myrank = _processor;
int ierr; int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
MPI_Request xrq; MPI_Request xrq;
MPI_Request rrq; MPI_Request rrq;
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0); assert(ierr==0);
list.push_back(xrq); list.push_back(xrq);
list.push_back(rrq); list.push_back(rrq);
} else { } else {
// Give the CPU to MPI immediately; can use threads to overlap optionally // Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from, recv,bytes,MPI_CHAR,from, from,
@ -363,7 +368,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int from, int from,
int bytes,int dir) int bytes,int dir)
{ {
int ncomm =communicator_halo.size(); int ncomm =communicator_halo.size();
int commdir=dir%ncomm; int commdir=dir%ncomm;
MPI_Request xrq; MPI_Request xrq;
@ -393,7 +398,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
off_node_bytes+=bytes; off_node_bytes+=bytes;
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) { if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir); this->StencilSendToRecvFromComplete(list,dir);
} }
@ -432,8 +437,8 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
communicator); communicator);
assert(ierr==0); assert(ierr==0);
} }
int CartesianCommunicator::RankWorld(void){ int CartesianCommunicator::RankWorld(void){
int r; int r;
MPI_Comm_rank(communicator_world,&r); MPI_Comm_rank(communicator_world,&r);
return r; return r;
} }
@ -466,7 +471,7 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
// (Turns up on 32^3 x 64 Gparity too) // (Turns up on 32^3 x 64 Gparity too)
MPI_Datatype object; MPI_Datatype object;
int iwords; int iwords;
int ibytes; int ibytes;
iwords = words; iwords = words;
ibytes = bytes; ibytes = bytes;
@ -479,5 +484,3 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);