/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/communicator/SharedMemory.cc Copyright (C) 2015 Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #include #include namespace Grid { /*Construct from an MPI communicator*/ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) { assert(_ShmSetup==0); WorldComm = comm; MPI_Comm_rank(WorldComm,&WorldRank); MPI_Comm_size(WorldComm,&WorldSize); // WorldComm, WorldSize, WorldRank ///////////////////////////////////////////////////////////////////// // Split into groups that can share memory ///////////////////////////////////////////////////////////////////// MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm); MPI_Comm_rank(WorldShmComm ,&WorldShmRank); MPI_Comm_size(WorldShmComm ,&WorldShmSize); // WorldShmComm, WorldShmSize, WorldShmRank // WorldNodes WorldNodes = WorldSize/WorldShmSize; assert( (WorldNodes * WorldShmSize) == WorldSize ); // FIXME: Check all WorldShmSize are the same ? ///////////////////////////////////////////////////////////////////// // find world ranks in our SHM group (i.e. which ranks are on our node) ///////////////////////////////////////////////////////////////////// MPI_Group WorldGroup, ShmGroup; MPI_Comm_group (WorldComm, &WorldGroup); MPI_Comm_group (WorldShmComm, &ShmGroup); std::vector world_ranks(WorldSize); for(int r=0;r MyGroup; MyGroup.resize(WorldShmSize); for(int rank=0;rank()); int myleader = MyGroup[0]; std::vector leaders_1hot(WorldSize,0); std::vector leaders_group(WorldNodes,0); leaders_1hot [ myleader ] = 1; /////////////////////////////////////////////////////////////////// // global sum leaders over comm world /////////////////////////////////////////////////////////////////// int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm); assert(ierr==0); /////////////////////////////////////////////////////////////////// // find the group leaders world rank /////////////////////////////////////////////////////////////////// int group=0; for(int l=0;l>1)^binary; return gray; } int Log2Size(int TwoToPower,int MAXLOG2) { int log2size = -1; for(int i=0;i<=MAXLOG2;i++){ if ( (0x1< &processors,Grid_MPI_Comm & optimal_comm) { #undef HYPERCUBE #ifdef HYPERCUBE //////////////////////////////////////////////////////////////// // Assert power of two shm_size. //////////////////////////////////////////////////////////////// int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE); assert(log2size != -1); //////////////////////////////////////////////////////////////// // Identify the hypercube coordinate of this node using hostname //////////////////////////////////////////////////////////////// // n runs 0...7 9...16 18...25 27...34 (8*4) 5 bits // i runs 0..7 3 bits // r runs 0..3 2 bits // 2^10 = 1024 nodes const int maxhdim = 10; std::vector HyperCubeCoords(maxhdim,0); std::vector RootHyperCubeCoords(maxhdim,0); int R; int I; int N; const int namelen = _POSIX_HOST_NAME_MAX; char name[namelen]; // Parse ICE-XA hostname to get hypercube location gethostname(name,namelen); int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; assert(nscan==3); int nlo = N%9; int nhi = N/9; uint32_t hypercoor = (R<<8)|(I<<5)|(nhi<<3)|nlo ; uint32_t rootcoor = hypercoor; ////////////////////////////////////////////////////////////////// // Print debug info ////////////////////////////////////////////////////////////////// for(int d=0;d>d)&0x1; } std::string hname(name); std::cout << "hostname "<=0); ////////////////////////////////////// // Printing ////////////////////////////////////// for(int d=0;d>d)&0x1; } //////////////////////////////////////////////////////////////// // Identify subblock of ranks on node spreading across dims // in a maximally symmetrical way //////////////////////////////////////////////////////////////// int ndimension = processors.size(); std::vector processor_coor(ndimension); std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); std::vector HyperCoor(ndimension); int dim = 0; for(int l2=0;l2> bits; } //////////////////////////////////////////////////////////////// // Check processor counts match //////////////////////////////////////////////////////////////// int Nprocessors=1; for(int i=0;i processor_coor(ndimension); std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); int dim = 0; for(int l2=0;l2 shmids(WorldShmSize); if ( WorldShmRank == 0 ) { for(int r=0;rpw_name,WorldNode,r); shm_unlink(shm_name); int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); if ( fd < 0 ) { perror("failed shm_open"); assert(0); } ftruncate(fd, size); int mmap_flag = MAP_SHARED; #ifdef MAP_POPULATE mmap_flag |= MAP_POPULATE; #endif #ifdef MAP_HUGETLB if (flags) mmap_flag |= MAP_HUGETLB; #endif void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, mmap_flag, fd, 0); std::cout << "Set WorldShmCommBufs["<pw_name,WorldNode,r); int fd=shm_open(shm_name,O_RDWR,0666); if ( fd<0 ) { perror("failed shm_open"); assert(0); } void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); WorldShmCommBufs[r] =ptr; close(fd); } } _ShmAlloc=1; _ShmAllocBytes = bytes; } #endif //////////////////////////////////////////////////////// // Global shared functionality finished // Now move to per communicator functionality //////////////////////////////////////////////////////// void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) { int rank, size; MPI_Comm_rank(comm,&rank); MPI_Comm_size(comm,&size); ShmRanks.resize(size); ///////////////////////////////////////////////////////////////////// // Split into groups that can share memory ///////////////////////////////////////////////////////////////////// MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm); MPI_Comm_rank(ShmComm ,&ShmRank); MPI_Comm_size(ShmComm ,&ShmSize); ShmCommBufs.resize(ShmSize); ////////////////////////////////////////////////////////////////////// // Map ShmRank to WorldShmRank and use the right buffer ////////////////////////////////////////////////////////////////////// assert (GlobalSharedMemory::ShmAlloc()==1); heap_size = GlobalSharedMemory::ShmAllocBytes(); for(int r=0;r