mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Simplify communicator case proliferation
This commit is contained in:
parent
0091eec23a
commit
357badce5e
@ -36,33 +36,9 @@ namespace Grid {
|
|||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Info that is setup once and indept of cartesian layout
|
// Info that is setup once and indept of cartesian layout
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
void * CartesianCommunicator::ShmCommBuf;
|
|
||||||
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL;
|
|
||||||
CartesianCommunicator::CommunicatorPolicy_t
|
CartesianCommunicator::CommunicatorPolicy_t
|
||||||
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
|
||||||
int CartesianCommunicator::nCommThreads = -1;
|
int CartesianCommunicator::nCommThreads = -1;
|
||||||
int CartesianCommunicator::Hugepages = 0;
|
|
||||||
|
|
||||||
/////////////////////////////////
|
|
||||||
// Alloc, free shmem region
|
|
||||||
/////////////////////////////////
|
|
||||||
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
|
|
||||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
|
||||||
void *ptr = (void *)heap_top;
|
|
||||||
heap_top += bytes;
|
|
||||||
heap_bytes+= bytes;
|
|
||||||
if (heap_bytes >= MAX_MPI_SHM_BYTES) {
|
|
||||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
|
||||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
|
||||||
std::cout<< " Current value is " << (MAX_MPI_SHM_BYTES/(1024*1024)) <<std::endl;
|
|
||||||
assert(heap_bytes<MAX_MPI_SHM_BYTES);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::ShmBufferFreeAll(void) {
|
|
||||||
heap_top =(size_t)ShmBufferSelf();
|
|
||||||
heap_bytes=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
// Grid information queries
|
// Grid information queries
|
||||||
@ -95,270 +71,6 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
|||||||
{
|
{
|
||||||
GlobalSumVector((double *)c,2*N);
|
GlobalSumVector((double *)c,2*N);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
|
|
||||||
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
|
|
||||||
{
|
|
||||||
std::vector<int> row(_ndimension,1);
|
|
||||||
assert(dim>=0 && dim<_ndimension);
|
|
||||||
|
|
||||||
// Split the communicator
|
|
||||||
row[dim] = _processors[dim];
|
|
||||||
|
|
||||||
int me;
|
|
||||||
CartesianCommunicator Comm(row,*this,me);
|
|
||||||
Comm.AllToAll(in,out,words,bytes);
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes)
|
|
||||||
{
|
|
||||||
// MPI is a pain and uses "int" arguments
|
|
||||||
// 64*64*64*128*16 == 500Million elements of data.
|
|
||||||
// When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug.
|
|
||||||
// (Turns up on 32^3 x 64 Gparity too)
|
|
||||||
MPI_Datatype object;
|
|
||||||
int iwords;
|
|
||||||
int ibytes;
|
|
||||||
iwords = words;
|
|
||||||
ibytes = bytes;
|
|
||||||
assert(words == iwords); // safe to cast to int ?
|
|
||||||
assert(bytes == ibytes); // safe to cast to int ?
|
|
||||||
MPI_Type_contiguous(ibytes,MPI_BYTE,&object);
|
|
||||||
MPI_Type_commit(&object);
|
|
||||||
MPI_Alltoall(in,iwords,object,out,iwords,object,communicator);
|
|
||||||
MPI_Type_free(&object);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
|
||||||
{
|
|
||||||
_ndimension = processors.size();
|
|
||||||
|
|
||||||
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
|
||||||
std::vector<int> parent_processor_coor(_ndimension,0);
|
|
||||||
std::vector<int> parent_processors (_ndimension,1);
|
|
||||||
|
|
||||||
// Can make 5d grid from 4d etc...
|
|
||||||
int pad = _ndimension-parent_ndimension;
|
|
||||||
for(int d=0;d<parent_ndimension;d++){
|
|
||||||
parent_processor_coor[pad+d]=parent._processor_coor[d];
|
|
||||||
parent_processors [pad+d]=parent._processors[d];
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// split the communicator
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int Nparent;
|
|
||||||
MPI_Comm_size(parent.communicator,&Nparent);
|
|
||||||
|
|
||||||
int childsize=1;
|
|
||||||
for(int d=0;d<processors.size();d++) {
|
|
||||||
childsize *= processors[d];
|
|
||||||
}
|
|
||||||
int Nchild = Nparent/childsize;
|
|
||||||
assert (childsize * Nchild == Nparent);
|
|
||||||
|
|
||||||
std::vector<int> ccoor(_ndimension); // coor within subcommunicator
|
|
||||||
std::vector<int> scoor(_ndimension); // coor of split within parent
|
|
||||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
|
||||||
|
|
||||||
for(int d=0;d<_ndimension;d++){
|
|
||||||
ccoor[d] = parent_processor_coor[d] % processors[d];
|
|
||||||
scoor[d] = parent_processor_coor[d] / processors[d];
|
|
||||||
ssize[d] = parent_processors[d] / processors[d];
|
|
||||||
}
|
|
||||||
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
|
||||||
// Mpi uses the reverse Lexico convention to us
|
|
||||||
Lexicographic::IndexFromCoorReversed(ccoor,crank,processors);
|
|
||||||
Lexicographic::IndexFromCoorReversed(scoor,srank,ssize);
|
|
||||||
|
|
||||||
MPI_Comm comm_split;
|
|
||||||
if ( Nchild > 1 ) {
|
|
||||||
|
|
||||||
if(0){
|
|
||||||
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
|
||||||
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
|
||||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
|
|
||||||
std::cout<<std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
|
||||||
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
|
||||||
std::cout<<std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
|
|
||||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
|
|
||||||
std::cout<<std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
|
|
||||||
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
|
|
||||||
std::cout<<std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
|
||||||
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
|
||||||
std::cout<<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
|
||||||
assert(ierr==0);
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Declare victory
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
|
||||||
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
|
||||||
} else {
|
|
||||||
comm_split=parent.communicator;
|
|
||||||
srank = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Set up from the new split communicator
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
InitFromMPICommunicator(processors,comm_split);
|
|
||||||
|
|
||||||
if(0){
|
|
||||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
|
||||||
for(int d=0;d<processors.size();d++){
|
|
||||||
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(int d=0;d<processors.size();d++){
|
|
||||||
assert(_processor_coor[d] == ccoor[d] );
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Take an MPI_Comm and self assemble
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base)
|
|
||||||
{
|
|
||||||
_ndimension = processors.size();
|
|
||||||
_processor_coor.resize(_ndimension);
|
|
||||||
|
|
||||||
/////////////////////////////////
|
|
||||||
// Count the requested nodes
|
|
||||||
/////////////////////////////////
|
|
||||||
_Nprocessors=1;
|
|
||||||
_processors = processors;
|
|
||||||
for(int i=0;i<_ndimension;i++){
|
|
||||||
_Nprocessors*=_processors[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> periodic(_ndimension,1);
|
|
||||||
MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator);
|
|
||||||
MPI_Comm_rank(communicator,&_processor);
|
|
||||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
|
||||||
|
|
||||||
if ( 0 && (communicator_base != communicator_world) ) {
|
|
||||||
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
|
|
||||||
|
|
||||||
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
|
|
||||||
for(int d=0;d<_processors.size();d++){
|
|
||||||
std::cout << _processor_coor[d]<<" ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Size;
|
|
||||||
MPI_Comm_size(communicator,&Size);
|
|
||||||
|
|
||||||
#if defined(GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3)
|
|
||||||
communicator_halo.resize (2*_ndimension);
|
|
||||||
for(int i=0;i<_ndimension*2;i++){
|
|
||||||
MPI_Comm_dup(communicator,&communicator_halo[i]);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
assert(Size==_Nprocessors);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT)
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|
||||||
{
|
|
||||||
InitFromMPICommunicator(processors,communicator_world);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined( GRID_COMMS_MPI3)
|
|
||||||
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
|
|
||||||
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT)
|
|
||||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
|
||||||
int xmit_to_rank,
|
|
||||||
void *recv,
|
|
||||||
int recv_from_rank,
|
|
||||||
int bytes, int dir)
|
|
||||||
{
|
|
||||||
std::vector<CommsRequest_t> list;
|
|
||||||
// Discard the "dir"
|
|
||||||
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
|
||||||
SendToRecvFromComplete(list);
|
|
||||||
return 2.0*bytes;
|
|
||||||
}
|
|
||||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
|
||||||
void *xmit,
|
|
||||||
int xmit_to_rank,
|
|
||||||
void *recv,
|
|
||||||
int recv_from_rank,
|
|
||||||
int bytes, int dir)
|
|
||||||
{
|
|
||||||
// Discard the "dir"
|
|
||||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
|
||||||
return 2.0*bytes;
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
|
|
||||||
{
|
|
||||||
SendToRecvFromComplete(waitall);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined( GRID_COMMS_MPI3)
|
|
||||||
|
|
||||||
void CartesianCommunicator::StencilBarrier(void){};
|
|
||||||
|
|
||||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
|
||||||
|
|
||||||
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
|
||||||
|
|
||||||
void *CartesianCommunicator::ShmBuffer(int rank) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::ShmInitGeneric(void){
|
|
||||||
#if 1
|
|
||||||
int mmap_flag =0;
|
|
||||||
#ifdef MAP_ANONYMOUS
|
|
||||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS;
|
|
||||||
#endif
|
|
||||||
#ifdef MAP_ANON
|
|
||||||
mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON;
|
|
||||||
#endif
|
|
||||||
#ifdef MAP_HUGETLB
|
|
||||||
if ( Hugepages ) mmap_flag |= MAP_HUGETLB;
|
|
||||||
#endif
|
|
||||||
ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0);
|
|
||||||
if (ShmCommBuf == (void *)MAP_FAILED) {
|
|
||||||
perror("mmap failed ");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
#ifdef MADV_HUGEPAGE
|
|
||||||
if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE);
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
|
||||||
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
|
||||||
#endif
|
|
||||||
bzero(ShmCommBuf,MAX_MPI_SHM_BYTES);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user