/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/communicator/Communicator_base.h Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_COMMUNICATOR_BASE_H #define GRID_COMMUNICATOR_BASE_H /////////////////////////////////// // Processor layout information /////////////////////////////////// #ifdef GRID_COMMS_MPI #include #endif #ifdef GRID_COMMS_MPI3 #include #endif #ifdef GRID_COMMS_SHMEM #include #endif namespace Grid { class CartesianCommunicator { public: // 65536 ranks per node adequate for now // 128MB shared memory for comms enought for 48^4 local vol comms // Give external control (command line override?) of this static const int MAXLOG2RANKSPERNODE = 16; static const uint64_t MAX_MPI_SHM_BYTES = 128*1024*1024; // Communicator should know nothing of the physics grid, only processor grid. int _Nprocessors; // How many in all std::vector _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank std::vector _processor_coor; // linear processor coordinate unsigned long _ndimension; #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) MPI_Comm communicator; static MPI_Comm communicator_world; typedef MPI_Request CommsRequest_t; #else typedef int CommsRequest_t; #endif //////////////////////////////////////////////////////////////////// // Helper functionality for SHM Windows common to all other impls //////////////////////////////////////////////////////////////////// // Longer term; drop this in favour of a master / slave model with // cartesian communicator on a subset of ranks, slave ranks controlled // by group leader with data xfer via shared memory //////////////////////////////////////////////////////////////////// #ifdef GRID_COMMS_MPI3 std::vector WorldDims; std::vector GroupDims; std::vector ShmDims; std::vector GroupCoor; std::vector ShmCoor; std::vector WorldCoor; static std::vector GroupRanks; static std::vector MyGroup; static int ShmSetup; static MPI_Win ShmWindow; static MPI_Comm ShmComm; std::vector LexicographicToWorldRank; static std::vector ShmCommBufs; #else static void ShmInitGeneric(void); static commVector ShmBufStorageVector; #endif static void * ShmCommBuf; size_t heap_top; size_t heap_bytes; void *ShmBufferSelf(void); void *ShmBuffer(int rank); void *ShmBufferTranslate(int rank,void * local_p); void *ShmBufferMalloc(size_t bytes); void ShmBufferFreeAll(void) ; //////////////////////////////////////////////// // Must call in Grid startup //////////////////////////////////////////////// static void Init(int *argc, char ***argv); //////////////////////////////////////////////// // Constructor of any given grid //////////////////////////////////////////////// CartesianCommunicator(const std::vector &pdimensions_in); //////////////////////////////////////////////////////////////////////////////////////// // Wraps MPI_Cart routines, or implements equivalent on other impls //////////////////////////////////////////////////////////////////////////////////////// void ShiftedRanks(int dim,int shift,int & source, int & dest); int RankFromProcessorCoor(std::vector &coor); void ProcessorCoorFromRank(int rank,std::vector &coor); ///////////////////////////////// // Grid information and queries ///////////////////////////////// static int ShmRank; static int ShmSize; static int GroupSize; static int GroupRank; static int WorldRank; static int WorldSize; static int Slave; int IsBoss(void) ; int BossRank(void) ; int ThisRank(void) ; const std::vector & ThisProcessorCoor(void) ; const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; static int Ranks (void); static int Nodes (void); static int Cores (void); static int NodeRank (void); static int CoreRank (void); //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid //////////////////////////////////////////////////////////////////////////////// static int RankWorld(void) ; static void BroadcastWorld(int root,void* data, int bytes); //////////////////////////////////////////////////////////// // Reduction //////////////////////////////////////////////////////////// void GlobalSum(RealF &); void GlobalSumVector(RealF *,int N); void GlobalSum(RealD &); void GlobalSumVector(RealD *,int N); void GlobalSum(uint32_t &); void GlobalSum(uint64_t &); void GlobalSum(ComplexF &c); void GlobalSumVector(ComplexF *c,int N); void GlobalSum(ComplexD &c); void GlobalSumVector(ComplexD *c,int N); template void GlobalSum(obj &o){ typedef typename obj::scalar_type scalar_type; int words = sizeof(obj)/sizeof(scalar_type); scalar_type * ptr = (scalar_type *)& o; GlobalSumVector(ptr,words); } //////////////////////////////////////////////////////////// // Face exchange, buffer swap in translational invariant way //////////////////////////////////////////////////////////// void SendToRecvFrom(void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes); void SendRecvPacket(void *xmit, void *recv, int xmit_to_rank, int recv_from_rank, int bytes); void SendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes); void SendToRecvFromComplete(std::vector &waitall); void StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes); void StencilSendToRecvFromComplete(std::vector &waitall); void StencilBarrier(void); //////////////////////////////////////////////////////////// // Barrier //////////////////////////////////////////////////////////// void Barrier(void); //////////////////////////////////////////////////////////// // Broadcast a buffer and composite larger //////////////////////////////////////////////////////////// void Broadcast(int root,void* data, int bytes); template void Broadcast(int root,obj &data) { Broadcast(root,(void *)&data,sizeof(data)); }; }; } #endif