1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 12:47:05 +01:00

Merge branch 'develop' into feature/scalar_adjointFT

# Conflicts:
#	lib/communicator/Communicator_mpi3.cc
This commit is contained in:
2017-11-11 18:09:55 +00:00
45 changed files with 2099 additions and 1505 deletions

View File

@ -97,9 +97,41 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
}
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
// Split the communicator
row[dim] = _processors[dim];
int me;
CartesianCommunicator Comm(row,*this,me);
Comm.AllToAll(in,out,words,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
// MPI is a pain and uses "int" arguments
// 64*64*64*128*16 == 500Million elements of data.
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
// (Turns up on 32^3 x 64 Gparity too)
MPI_Datatype object;
int iwords;
int ibytes;
iwords = words;
ibytes = bytes;
assert(words == iwords); // safe to cast to int ?
assert(bytes == ibytes); // safe to cast to int ?
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
MPI_Type_commit(&object);
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
MPI_Type_free(&object);
}
#endif
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{
_ndimension = processors.size();
assert(_ndimension = parent._ndimension);
@ -124,33 +156,51 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent._processor_coor[d] % processors[d];
scoor[d] = parent._processor_coor[d] / processors[d];
ssize[d] = parent._processors[d]/ processors[d];
ssize[d] = parent._processors[d] / processors[d];
}
int crank,srank; // rank within subcomm ; rank of subcomm within blocks of subcomms
Lexicographic::IndexFromCoor(ccoor,crank,processors);
Lexicographic::IndexFromCoor(scoor,srank,ssize);
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
// Mpi uses the reverse Lexico convention to us
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors);
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize);
MPI_Comm comm_split;
if ( Nchild > 1 ) {
// std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
// std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
// for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
// std::cout<<std::endl;
/*
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
// std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
// for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
// std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
*/
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
/*
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
*/
} else {
comm_split=parent.communicator;
srank = 0;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -158,14 +208,12 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Take an MPI_Comm and self assemble
//////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
{
// if ( communicator_base != communicator_world ) {
// std::cout << "Cartesian communicator created with a non-world communicator"<<std::endl;
// }
_ndimension = processors.size();
_processor_coor.resize(_ndimension);
@ -179,14 +227,24 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
}
std::vector<int> periodic(_ndimension,1);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],1,&communicator);
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
if ( communicator_base != communicator_world ) {
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
for(int d=0;d<_processors.size();d++){
std::cout << _processor_coor[d]<<" ";
}
std::cout << std::endl;
}
int Size;
MPI_Comm_size(communicator,&Size);
#ifdef GRID_COMMS_MPIT
#if defined(GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
communicator_halo.resize (2*_ndimension);
for(int i=0;i<_ndimension*2;i++){
MPI_Comm_dup(communicator,&communicator_halo[i]);
@ -195,7 +253,9 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
assert(Size==_Nprocessors);
}
#endif
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
InitFromMPICommunicator(processors,communicator_world);
@ -204,10 +264,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
#endif
#if !defined( GRID_COMMS_MPI3)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
#endif
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT)
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int xmit_to_rank,

View File

@ -153,12 +153,12 @@ class CartesianCommunicator {
// Constructors to sub-divide a parent communicator
// and default to comm world
////////////////////////////////////////////////
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent);
CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank);
CartesianCommunicator(const std::vector<int> &pdimensions_in);
virtual ~CartesianCommunicator();
private:
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
////////////////////////////////////////////////
// Private initialise from an MPI communicator
// Can use after an MPI_Comm_split, but hidden from user so private
@ -275,12 +275,16 @@ class CartesianCommunicator {
// std::cerr << " AllToAll in.size() "<<in.size()<<std::endl;
// std::cerr << " AllToAll out.size() "<<out.size()<<std::endl;
assert(in.size()==out.size());
size_t bytes=(in.size()*sizeof(T))/numnode;
assert((bytes*numnode) == in.size()*sizeof(T));
AllToAll(dim,(void *)&in[0],(void *)&out[0],bytes);
uint64_t bytes=sizeof(T);
uint64_t words=in.size()/numnode;
assert(numnode * words == in.size());
assert(words < (1ULL<<32));
AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes);
}
void AllToAll(int dim ,void *in,void *out,int bytes);
void AllToAll(void *in,void *out,int bytes);
void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes);
void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes);
template<class obj> void Broadcast(int root,obj &data)
{

View File

@ -55,7 +55,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
CartesianCommunicator::~CartesianCommunicator()
{
if (communicator && !MPI::Is_finalized())
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised)
MPI_Comm_free(&communicator);
}
@ -194,21 +196,6 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
{
std::vector<int> row(_ndimension,1);
assert(dim>=0 && dim<_ndimension);
// Split the communicator
row[dim] = _processors[dim];
CartesianCommunicator Comm(row,*this);
Comm.AllToAll(in,out,bytes);
}
void CartesianCommunicator::AllToAll(void *in,void *out,int bytes)
{
MPI_Alltoall(in ,bytes,MPI_BYTE,out,bytes,MPI_BYTE,communicator);
}
///////////////////////////////////////////////////////
// Should only be used prior to Grid Init finished.

View File

@ -454,11 +454,15 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
//////////////////////////////////
// Try to subdivide communicator
//////////////////////////////////
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
/*
* Use default in MPI compile
*/
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors)
{
std::cout << "Attempts to split MPI3 communicators will fail until implemented" <<std::endl;
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
int ierr;
@ -596,9 +600,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
}
}
};
CartesianCommunicator::~CartesianCommunicator() = default;
CartesianCommunicator::~CartesianCommunicator()
{
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised) {
MPI_Comm_free(&communicator);
for(int i=0;i< communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -715,7 +727,8 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int from,
int bytes,int dir)
{
assert(dir < communicator_halo.size());
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
MPI_Request xrq;
MPI_Request rrq;
@ -735,14 +748,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
gfrom = MPI_UNDEFINED;
#endif
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[dir],&rrq);
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator_halo[commdir],&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[dir],&xrq);
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator_halo[commdir],&xrq);
assert(ierr==0);
list.push_back(xrq);
off_node_bytes+=bytes;

View File

@ -55,11 +55,16 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
CartesianCommunicator::~CartesianCommunicator()
{
if (communicator && !MPI::Is_finalized())
int MPI_is_finalised;
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised){
MPI_Comm_free(&communicator);
for(int i=0;i< communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}
}
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
@ -224,13 +229,14 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
{
int myrank = _processor;
int ierr;
assert(dir < communicator_halo.size());
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
// Give the CPU to MPI immediately; can use threads to overlap optionally
MPI_Request req[2];
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]);
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[commdir],&req[0]);
list.push_back(req[0]);
list.push_back(req[1]);
@ -240,7 +246,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
{
int nreq=waitall.size();
MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE);
};
}
double CartesianCommunicator::StencilSendToRecvFrom(void *xmit,
int xmit_to_rank,
void *recv,
@ -249,13 +255,14 @@ double CartesianCommunicator::StencilSendToRecvFrom(void *xmit,
{
int myrank = _processor;
int ierr;
assert(dir < communicator_halo.size());
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo[dir]<<std::endl;
// std::cout << " sending on communicator "<<dir<<" " <<communicator_halo.size()<< <std::endl;
int ncomm =communicator_halo.size();
int commdir=dir%ncomm;
// Give the CPU to MPI immediately; can use threads to overlap optionally
MPI_Request req[2];
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[dir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[dir],&req[0]);
MPI_Irecv(recv,bytes,MPI_CHAR,recv_from_rank,recv_from_rank, communicator_halo[commdir],&req[1]);
MPI_Isend(xmit,bytes,MPI_CHAR,xmit_to_rank ,myrank , communicator_halo[commdir],&req[0]);
MPI_Waitall(2, req, MPI_STATUSES_IGNORE);
return 2.0*bytes;
}

View File

@ -38,8 +38,8 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
ShmInitGeneric();
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors) {}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
: CartesianCommunicator(processors) { srank=0;}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
@ -100,9 +100,13 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,int bytes)
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes);
bcopy(in,out,bytes*words);
}
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
{
bcopy(in,out,bytes*words);
}
int CartesianCommunicator::RankWorld(void){return 0;}

View File

@ -75,6 +75,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
ShmInitGeneric();
}
CartesianCommunicator::~CartesianCommunicator(){}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent)
: CartesianCommunicator(processors)
{