/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/communicator/Communicator_none.cc Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include #include #include #include #include namespace Grid { /////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////// void * CartesianCommunicator::ShmCommBuf; uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; int CartesianCommunicator::nCommThreads = -1; int CartesianCommunicator::Hugepages = 0; ///////////////////////////////// // Alloc, free shmem region ///////////////////////////////// void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ // bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes void *ptr = (void *)heap_top; heap_top += bytes; heap_bytes+= bytes; if (heap_bytes >= MAX_MPI_SHM_BYTES) { std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; }; const std::vector & CartesianCommunicator::ProcessorGrid(void) { return _processors; }; int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; }; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid //////////////////////////////////////////////////////////////////////////////// void CartesianCommunicator::GlobalSum(ComplexF &c) { GlobalSumVector((float *)&c,2); } void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N) { GlobalSumVector((float *)c,2*N); } void CartesianCommunicator::GlobalSum(ComplexD &c) { GlobalSumVector((double *)&c,2); } void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) { GlobalSumVector((double *)c,2*N); } #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) { std::vector row(_ndimension,1); assert(dim>=0 && dim<_ndimension); // Split the communicator row[dim] = _processors[dim]; int me; CartesianCommunicator Comm(row,*this,me); Comm.AllToAll(in,out,words,bytes); } void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) { // MPI is a pain and uses "int" arguments // 64*64*64*128*16 == 500Million elements of data. // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. // (Turns up on 32^3 x 64 Gparity too) MPI_Datatype object; int iwords; int ibytes; iwords = words; ibytes = bytes; assert(words == iwords); // safe to cast to int ? assert(bytes == ibytes); // safe to cast to int ? MPI_Type_contiguous(ibytes,MPI_BYTE,&object); MPI_Type_commit(&object); MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); MPI_Type_free(&object); } #endif #if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); std::vector parent_processor_coor(_ndimension,0); std::vector parent_processors (_ndimension,1); // Can make 5d grid from 4d etc... int pad = _ndimension-parent_ndimension; for(int d=0;d ccoor(_ndimension); // coor within subcommunicator std::vector scoor(_ndimension); // coor of split within parent std::vector ssize(_ndimension); // coor of split within parent for(int d=0;d<_ndimension;d++){ ccoor[d] = parent_processor_coor[d] % processors[d]; scoor[d] = parent_processor_coor[d] / processors[d]; ssize[d] = parent_processors[d] / processors[d]; } int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms // Mpi uses the reverse Lexico convention to us Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); MPI_Comm comm_split; if ( Nchild > 1 ) { if(0){ std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, MPI_Comm communicator_base) { _ndimension = processors.size(); _processor_coor.resize(_ndimension); ///////////////////////////////// // Count the requested nodes ///////////////////////////////// _Nprocessors=1; _processors = processors; for(int i=0;i<_ndimension;i++){ _Nprocessors*=_processors[i]; } std::vector periodic(_ndimension,1); MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); if ( 0 && (communicator_base != communicator_world) ) { std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< &processors) { InitFromMPICommunicator(processors,communicator_world); } #endif #if !defined( GRID_COMMS_MPI3) int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; #endif #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes, int dir) { std::vector list; // Discard the "dir" SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); SendToRecvFromComplete(list); return 2.0*bytes; } double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, int xmit_to_rank, void *recv, int recv_from_rank, int bytes, int dir) { // Discard the "dir" SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); return 2.0*bytes; } void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) { SendToRecvFromComplete(waitall); } #endif #if !defined( GRID_COMMS_MPI3) void CartesianCommunicator::StencilBarrier(void){}; commVector CartesianCommunicator::ShmBufStorageVector; void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } void *CartesianCommunicator::ShmBuffer(int rank) { return NULL; } void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { return NULL; } void CartesianCommunicator::ShmInitGeneric(void){ #if 1 int mmap_flag =0; #ifdef MAP_ANONYMOUS mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; #endif #ifdef MAP_ANON mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; #endif #ifdef MAP_HUGETLB if ( Hugepages ) mmap_flag |= MAP_HUGETLB; #endif ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); if (ShmCommBuf == (void *)MAP_FAILED) { perror("mmap failed "); exit(EXIT_FAILURE); } #ifdef MADV_HUGEPAGE if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); #endif #else ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); ShmCommBuf=(void *)&ShmBufStorageVector[0]; #endif bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); } #endif }