/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/communicator/Communicator_base.h Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_COMMUNICATOR_BASE_H #define GRID_COMMUNICATOR_BASE_H /////////////////////////////////// // Processor layout information /////////////////////////////////// #include NAMESPACE_BEGIN(Grid); class CartesianCommunicator : public SharedMemory { public: //////////////////////////////////////////// // Policies //////////////////////////////////////////// enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; static CommunicatorPolicy_t CommunicatorPolicy; static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } static int nCommThreads; //////////////////////////////////////////// // Communicator should know nothing of the physics grid, only processor grid. //////////////////////////////////////////// int _Nprocessors; // How many in all Coordinate _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank Coordinate _processor_coor; // linear processor coordinate unsigned long _ndimension; static Grid_MPI_Comm communicator_world; Grid_MPI_Comm communicator; std::vector communicator_halo; //////////////////////////////////////////////// // Must call in Grid startup //////////////////////////////////////////////// static void Init(int *argc, char ***argv); //////////////////////////////////////////////// // Constructors to sub-divide a parent communicator // and default to comm world //////////////////////////////////////////////// CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank); CartesianCommunicator(const Coordinate &pdimensions_in); virtual ~CartesianCommunicator(); private: //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private //////////////////////////////////////////////// void InitFromMPICommunicator(const Coordinate &processors, Grid_MPI_Comm communicator_base); public: //////////////////////////////////////////////////////////////////////////////////////// // Wraps MPI_Cart routines, or implements equivalent on other impls //////////////////////////////////////////////////////////////////////////////////////// void ShiftedRanks(int dim,int shift,int & source, int & dest); int RankFromProcessorCoor(Coordinate &coor); void ProcessorCoorFromRank(int rank,Coordinate &coor); int Dimensions(void) ; int IsBoss(void) ; int BossRank(void) ; int ThisRank(void) ; const Coordinate & ThisProcessorCoor(void) ; const Coordinate & ProcessorGrid(void) ; int ProcessorCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid //////////////////////////////////////////////////////////////////////////////// static int RankWorld(void) ; static void BroadcastWorld(int root,void* data, int bytes); //////////////////////////////////////////////////////////// // Reduction //////////////////////////////////////////////////////////// void GlobalSum(RealF &); void GlobalSumVector(RealF *,int N); void GlobalSum(RealD &); void GlobalSumVector(RealD *,int N); void GlobalSum(uint32_t &); void GlobalSum(uint64_t &); void GlobalSum(ComplexF &c); void GlobalSumVector(ComplexF *c,int N); void GlobalSum(ComplexD &c); void GlobalSumVector(ComplexD *c,int N); void GlobalXOR(uint32_t &); void GlobalXOR(uint64_t &); template void GlobalSum(obj &o){ typedef typename obj::scalar_type scalar_type; int words = sizeof(obj)/sizeof(scalar_type); scalar_type * ptr = (scalar_type *)& o; GlobalSumVector(ptr,words); } //////////////////////////////////////////////////////////// // Face exchange, buffer swap in translational invariant way //////////////////////////////////////////////////////////// void SendToRecvFrom(void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes); void SendRecvPacket(void *xmit, void *recv, int xmit_to_rank, int recv_from_rank, int bytes); void SendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes); void SendToRecvFromComplete(std::vector &waitall); double StencilSendToRecvFrom(void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes,int dir); double StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes,int dir); void StencilSendToRecvFromComplete(std::vector &waitall,int i); void StencilBarrier(void); //////////////////////////////////////////////////////////// // Barrier //////////////////////////////////////////////////////////// void Barrier(void); //////////////////////////////////////////////////////////// // Broadcast a buffer and composite larger //////////////////////////////////////////////////////////// void Broadcast(int root,void* data, int bytes); //////////////////////////////////////////////////////////// // All2All down one dimension //////////////////////////////////////////////////////////// template void AllToAll(int dim,std::vector &in, std::vector &out){ assert(dim>=0); assert(dim<_ndimension); assert(in.size()==out.size()); int numnode = _processors[dim]; uint64_t bytes=sizeof(T); uint64_t words=in.size()/numnode; assert(numnode * words == in.size()); assert(words < (1ULL<<31)); AllToAll(dim,(void *)&in[0],(void *)&out[0],words,bytes); } void AllToAll(int dim ,void *in,void *out,uint64_t words,uint64_t bytes); void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes); template void Broadcast(int root,obj &data) { Broadcast(root,(void *)&data,sizeof(data)); } }; NAMESPACE_END(Grid); #endif