/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/communicator/Communicator_mpi.cc Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include "Grid.h" #include namespace Grid { // Global used by Init and nowhere else. How to hide? int Rank(void) { int pe; MPI_Comm_rank(MPI_COMM_WORLD,&pe); return pe; } // Should error check all MPI calls. void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init(argc,argv); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////// // Want to implement some magic ... Group sub-cubes into those on same node // //////////////////////////////////////////////////////////////////////////////////////////////////////////// void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) { std::vector coor = _processor_coor; assert(std::abs(shift) <_processors[dim]); coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; Lexicographic::IndexFromCoor(coor,source,_processors); source = LexicographicToWorldRank[source]; coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; Lexicographic::IndexFromCoor(coor,dest,_processors); dest = LexicographicToWorldRank[dest]; } int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { int rank; Lexicographic::IndexFromCoor(coor,rank,_processors); rank = LexicographicToWorldRank[rank]; return rank; } void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) { Lexicographic::CoorFromIndex(coor,rank,_processors); rank = LexicographicToWorldRank[rank]; } /////////////////////////////////////////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////////////////////////////////////////// int CartesianCommunicator::ShmSetup = 0; int CartesianCommunicator::ShmRank; int CartesianCommunicator::ShmSize; int CartesianCommunicator::GroupRank; int CartesianCommunicator::GroupSize; MPI_Comm CartesianCommunicator::ShmComm; MPI_Win CartesianCommunicator::ShmWindow; std::vector CartesianCommunicator::GroupRanks; std::vector CartesianCommunicator::MyGroup; CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _ndimension = processors.size(); WorldDims = processors; communicator = MPI_COMM_WORLD; MPI_Comm_rank(communicator,&WorldRank); MPI_Comm_size(communicator,&WorldSize); ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free. ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Does every grid need one, or could we share across all grids via a singleton/guard? int ierr; if ( !ShmSetup ) { MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm); MPI_Comm_rank(ShmComm ,&ShmRank); MPI_Comm_size(ShmComm ,&ShmSize); GroupSize = WorldSize/ShmSize; ///////////////////////////////////////////////////////////////////// // find world ranks in our SHM group (i.e. which ranks are on our node) ///////////////////////////////////////////////////////////////////// MPI_Group WorldGroup, ShmGroup; MPI_Comm_group (communicator, &WorldGroup); MPI_Comm_group (ShmComm, &ShmGroup); std::vector world_ranks(WorldSize); GroupRanks.resize(WorldSize); MyGroup.resize(ShmSize); for(int r=0;r()); int myleader = MyGroup[0]; std::vector leaders_1hot(WorldSize,0); std::vector leaders_group(GroupSize,0); leaders_1hot [ myleader ] = 1; /////////////////////////////////////////////////////////////////// // global sum leaders over comm world /////////////////////////////////////////////////////////////////// ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator); assert(ierr==0); /////////////////////////////////////////////////////////////////// // find the group leaders world rank /////////////////////////////////////////////////////////////////// int group=0; for(int l=0;l reqs(0); SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); SendToRecvFromComplete(reqs); } void CartesianCommunicator::SendRecvPacket(void *xmit, void *recv, int sender, int receiver, int bytes) { MPI_Status stat; assert(sender != receiver); int tag = sender; if ( _processor == sender ) { MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); } if ( _processor == receiver ) { MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); } } // Basic Halo comms primitive void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, void *xmit, int dest, void *recv, int from, int bytes) { #undef SHM_USE_BCOPY MPI_Request xrq; MPI_Request rrq; static int sequence; int rank = _processor; int ierr; int tag; int check; assert(dest != _processor); assert(from != _processor); int gdest = GroupRanks[dest]; int gme = GroupRanks[_processor]; sequence++; char *to_ptr = (char *)ShmCommBufs[gdest]; char *from_ptr = (char *)ShmCommBufs[ShmRank]; int small = (bytes>2; assert(((size_t)bytes &0x3)==0); assert(((size_t)xmit &0x3)==0); assert(((size_t)recv &0x3)==0); #endif assert(gme == ShmRank); if ( small && (dest !=MPI_UNDEFINED) ) { assert(gme != gdest); float *ip = (float *)xmit; float *op = (float *)to_ptr; #ifdef SHM_USE_BCOPY bcopy(ip,op,bytes); #else PARALLEL_FOR_LOOP for(int w=0;w &list) { int nreq=list.size(); std::vector status(nreq); int ierr = MPI_Waitall(nreq,&list[0],&status[0]); assert(ierr==0); } void CartesianCommunicator::Barrier(void) { int ierr = MPI_Barrier(communicator); assert(ierr==0); } void CartesianCommunicator::Broadcast(int root,void* data, int bytes) { int ierr=MPI_Bcast(data, bytes, MPI_BYTE, root, communicator); assert(ierr==0); } void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { int ierr= MPI_Bcast(data, bytes, MPI_BYTE, root, MPI_COMM_WORLD); assert(ierr==0); } }