mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
MPI_THREAD_SINGLE hack for Fugaku, enabled by -DTOFU
This commit is contained in:
parent
b2fd8b993a
commit
ffaaed679e
@ -1,6 +1,6 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/communicator/Communicator_mpi.cc
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ Grid_MPI_Comm CartesianCommunicator::communicator_world;
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// First initialise of comms system
|
// First initialise of comms system
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
void CartesianCommunicator::Init(int *argc, char ***argv)
|
void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||||
{
|
{
|
||||||
|
|
||||||
int flag;
|
int flag;
|
||||||
@ -43,6 +43,10 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
|
|||||||
|
|
||||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
if ( !flag ) {
|
if ( !flag ) {
|
||||||
|
#if defined (TOFU) // hack for FUGAKU, credits go to Issaku Kanamori
|
||||||
|
nCommThreads=1;
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
#else
|
||||||
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
|
||||||
|
|
||||||
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
//If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
|
||||||
@ -53,6 +57,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
|
|||||||
if( (nCommThreads > 1) && (provided != MPI_THREAD_MULTIPLE) ) {
|
if( (nCommThreads > 1) && (provided != MPI_THREAD_MULTIPLE) ) {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Never clean up as done once.
|
// Never clean up as done once.
|
||||||
@ -91,7 +96,7 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor)
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Initialises from communicator_world
|
// Initialises from communicator_world
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
|
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
|
||||||
{
|
{
|
||||||
MPI_Comm optimal_comm;
|
MPI_Comm optimal_comm;
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
@ -110,7 +115,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
|
|||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
// Try to subdivide communicator
|
// Try to subdivide communicator
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank)
|
CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank)
|
||||||
{
|
{
|
||||||
_ndimension = processors.size(); assert(_ndimension>=1);
|
_ndimension = processors.size(); assert(_ndimension>=1);
|
||||||
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
||||||
@ -127,7 +132,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// split the communicator
|
// split the communicator
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// int Nparent = parent._processors ;
|
// int Nparent = parent._processors ;
|
||||||
int Nparent;
|
int Nparent;
|
||||||
MPI_Comm_size(parent.communicator,&Nparent);
|
MPI_Comm_size(parent.communicator,&Nparent);
|
||||||
|
|
||||||
@ -149,13 +154,13 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
|||||||
}
|
}
|
||||||
|
|
||||||
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
// rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
||||||
int crank;
|
int crank;
|
||||||
// Mpi uses the reverse Lexico convention to us; so reversed routines called
|
// Mpi uses the reverse Lexico convention to us; so reversed routines called
|
||||||
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
|
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions
|
||||||
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
|
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids
|
||||||
|
|
||||||
MPI_Comm comm_split;
|
MPI_Comm comm_split;
|
||||||
if ( Nchild > 1 ) {
|
if ( Nchild > 1 ) {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
// Split the communicator
|
// Split the communicator
|
||||||
@ -180,11 +185,11 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
|
|||||||
SetCommunicator(comm_split);
|
SetCommunicator(comm_split);
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Free the temp communicator
|
// Free the temp communicator
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
MPI_Comm_free(&comm_split);
|
MPI_Comm_free(&comm_split);
|
||||||
|
|
||||||
if(0){
|
if(0){
|
||||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||||
for(int d=0;d<processors.size();d++){
|
for(int d=0;d<processors.size();d++){
|
||||||
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
||||||
@ -245,7 +250,7 @@ CartesianCommunicator::~CartesianCommunicator()
|
|||||||
for(int i=0;i<communicator_halo.size();i++){
|
for(int i=0;i<communicator_halo.size();i++){
|
||||||
MPI_Comm_free(&communicator_halo[i]);
|
MPI_Comm_free(&communicator_halo[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
@ -310,7 +315,7 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
if ( _processor == sender ) {
|
if ( _processor == sender ) {
|
||||||
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
}
|
}
|
||||||
if ( _processor == receiver ) {
|
if ( _processor == receiver ) {
|
||||||
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -325,17 +330,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
|||||||
int myrank = _processor;
|
int myrank = _processor;
|
||||||
int ierr;
|
int ierr;
|
||||||
|
|
||||||
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
|
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) {
|
||||||
MPI_Request xrq;
|
MPI_Request xrq;
|
||||||
MPI_Request rrq;
|
MPI_Request rrq;
|
||||||
|
|
||||||
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
list.push_back(xrq);
|
list.push_back(xrq);
|
||||||
list.push_back(rrq);
|
list.push_back(rrq);
|
||||||
} else {
|
} else {
|
||||||
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
// Give the CPU to MPI immediately; can use threads to overlap optionally
|
||||||
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
|
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
|
||||||
recv,bytes,MPI_CHAR,from, from,
|
recv,bytes,MPI_CHAR,from, from,
|
||||||
@ -363,7 +368,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
|||||||
int from,
|
int from,
|
||||||
int bytes,int dir)
|
int bytes,int dir)
|
||||||
{
|
{
|
||||||
int ncomm =communicator_halo.size();
|
int ncomm =communicator_halo.size();
|
||||||
int commdir=dir%ncomm;
|
int commdir=dir%ncomm;
|
||||||
|
|
||||||
MPI_Request xrq;
|
MPI_Request xrq;
|
||||||
@ -393,7 +398,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
|||||||
off_node_bytes+=bytes;
|
off_node_bytes+=bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||||
this->StencilSendToRecvFromComplete(list,dir);
|
this->StencilSendToRecvFromComplete(list,dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -432,8 +437,8 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
|||||||
communicator);
|
communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
int CartesianCommunicator::RankWorld(void){
|
int CartesianCommunicator::RankWorld(void){
|
||||||
int r;
|
int r;
|
||||||
MPI_Comm_rank(communicator_world,&r);
|
MPI_Comm_rank(communicator_world,&r);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@ -466,7 +471,7 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
|
|||||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
||||||
// (Turns up on 32^3 x 64 Gparity too)
|
// (Turns up on 32^3 x 64 Gparity too)
|
||||||
MPI_Datatype object;
|
MPI_Datatype object;
|
||||||
int iwords;
|
int iwords;
|
||||||
int ibytes;
|
int ibytes;
|
||||||
iwords = words;
|
iwords = words;
|
||||||
ibytes = bytes;
|
ibytes = bytes;
|
||||||
@ -479,5 +484,3 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user