diff --git a/TODO b/TODO index 83bfda5e..746302ca 100644 --- a/TODO +++ b/TODO @@ -1,20 +1,36 @@ TODO: --------------- -Large item work list: +Code item work list + +a) namespaces & indentation + GRID_BEGIN_NAMESPACE(); + GRID_END_NAMESPACE(); +-- delete QCD namespace + +b) GPU branch +- start branch +- Increase Macro use in core library support; prepare for change +- Audit volume of "device" code +- Virtual function audit +- Start port once Nvidia box is up +- Cut down volume of code for first port? How? + +Physics item work list: 1)- BG/Q port and check ; Andrew says ok. -2)- Christoph's local basis expansion Lanczos --- -3a)- RNG I/O in ILDG/SciDAC (minor) -3b)- Precision conversion and sort out localConvert <-- partial/easy -3c)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -4)- Physical propagator interface -5)- Conserved currents -6)- Multigrid Wilson and DWF, compare to other Multigrid implementations -7)- HDCR resume +2)- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +3)- Physical propagator interface +4)- Multigrid Wilson and DWF, compare to other Multigrid implementations +5)- HDCR resume +---------------------------- Recent DONE +-- RNG I/O in ILDG/SciDAC (minor) +-- Precision conversion and sort out localConvert <-- partial/easy +-- Conserved currents (Andrew) +-- Split grid +-- Christoph's local basis expansion Lanczos -- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O ; <-- DONE ; bmark cori -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE -- GaugeFix into central location <-- DONE diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index a270e3fa..29ccf96c 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -106,7 +106,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -202,7 +202,7 @@ int main (int argc, char ** argv) int recv_from_rank; { - std::vector requests; + std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu][0], @@ -215,7 +215,7 @@ int main (int argc, char ** argv) comm_proc = mpi_layout[mu]-1; { - std::vector requests; + std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], @@ -290,7 +290,7 @@ int main (int argc, char ** argv) dbytes=0; ncomm=0; - std::vector requests; + std::vector requests; for(int mu=0;mu<4;mu++){ @@ -383,7 +383,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; dbytes=0; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -481,7 +481,7 @@ int main (int argc, char ** argv) for(int i=0;i requests; + std::vector requests; dbytes=0; ncomm=0; diff --git a/configure.ac b/configure.ac index 496f7fd7..468d9d5f 100644 --- a/configure.ac +++ b/configure.ac @@ -337,15 +337,11 @@ case ${ac_PRECISION} in esac ###################### Shared memory allocation technique under MPI3 -AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs], +AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|hugetlbfs], [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen]) case ${ac_SHM} in - shmget) - AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] ) - ;; - shmopen) AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] ) ;; @@ -367,7 +363,7 @@ AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path], AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing]) ############### communication type selection -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], +AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto], [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in @@ -375,22 +371,10 @@ case ${ac_COMMS} in AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) comms_type='none' ;; - mpi3*) + mpi*) AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) comms_type='mpi3' ;; - mpit) - AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] ) - comms_type='mpit' - ;; - mpi*) - AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) - comms_type='mpi' - ;; - shmem) - AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) - comms_type='shmem' - ;; *) AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); ;; diff --git a/lib/Makefile.am b/lib/Makefile.am index 6dd7899e..dc33e7cf 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,28 +1,18 @@ extra_sources= extra_headers= -if BUILD_COMMS_MPI - extra_sources+=communicator/Communicator_mpi.cc - extra_sources+=communicator/Communicator_base.cc -endif if BUILD_COMMS_MPI3 extra_sources+=communicator/Communicator_mpi3.cc extra_sources+=communicator/Communicator_base.cc -endif - -if BUILD_COMMS_MPIT - extra_sources+=communicator/Communicator_mpit.cc - extra_sources+=communicator/Communicator_base.cc -endif - -if BUILD_COMMS_SHMEM - extra_sources+=communicator/Communicator_shmem.cc - extra_sources+=communicator/Communicator_base.cc + extra_sources+=communicator/SharedMemoryMPI.cc + extra_sources+=communicator/SharedMemory.cc endif if BUILD_COMMS_NONE extra_sources+=communicator/Communicator_none.cc extra_sources+=communicator/Communicator_base.cc + extra_sources+=communicator/SharedMemoryNone.cc + extra_sources+=communicator/SharedMemory.cc endif if BUILD_HDF5 diff --git a/lib/communicator/Communicator.h b/lib/communicator/Communicator.h index 09ce50dc..d4ec5a13 100644 --- a/lib/communicator/Communicator.h +++ b/lib/communicator/Communicator.h @@ -28,6 +28,7 @@ Author: Peter Boyle #ifndef GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H +#include #include #endif diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 3e561405..edbf26af 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -36,33 +36,9 @@ namespace Grid { /////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////// -void * CartesianCommunicator::ShmCommBuf; -uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent; int CartesianCommunicator::nCommThreads = -1; -int CartesianCommunicator::Hugepages = 0; - -///////////////////////////////// -// Alloc, free shmem region -///////////////////////////////// -void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ - // bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes - void *ptr = (void *)heap_top; - heap_top += bytes; - heap_bytes+= bytes; - if (heap_bytes >= MAX_MPI_SHM_BYTES) { - std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < row(_ndimension,1); - assert(dim>=0 && dim<_ndimension); - - // Split the communicator - row[dim] = _processors[dim]; - - int me; - CartesianCommunicator Comm(row,*this,me); - Comm.AllToAll(in,out,words,bytes); -} -void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) -{ - // MPI is a pain and uses "int" arguments - // 64*64*64*128*16 == 500Million elements of data. - // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. - // (Turns up on 32^3 x 64 Gparity too) - MPI_Datatype object; - int iwords; - int ibytes; - iwords = words; - ibytes = bytes; - assert(words == iwords); // safe to cast to int ? - assert(bytes == ibytes); // safe to cast to int ? - MPI_Type_contiguous(ibytes,MPI_BYTE,&object); - MPI_Type_commit(&object); - MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); - MPI_Type_free(&object); -} -#endif - -#if defined( GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) -{ - _ndimension = processors.size(); - - int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); - std::vector parent_processor_coor(_ndimension,0); - std::vector parent_processors (_ndimension,1); - - // Can make 5d grid from 4d etc... - int pad = _ndimension-parent_ndimension; - for(int d=0;d ccoor(_ndimension); // coor within subcommunicator - std::vector scoor(_ndimension); // coor of split within parent - std::vector ssize(_ndimension); // coor of split within parent - - for(int d=0;d<_ndimension;d++){ - ccoor[d] = parent_processor_coor[d] % processors[d]; - scoor[d] = parent_processor_coor[d] / processors[d]; - ssize[d] = parent_processors[d] / processors[d]; - } - int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms - // Mpi uses the reverse Lexico convention to us - Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); - Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); - - MPI_Comm comm_split; - if ( Nchild > 1 ) { - - if(0){ - std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, MPI_Comm communicator_base) -{ - _ndimension = processors.size(); - _processor_coor.resize(_ndimension); - - ///////////////////////////////// - // Count the requested nodes - ///////////////////////////////// - _Nprocessors=1; - _processors = processors; - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - std::vector periodic(_ndimension,1); - MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); - MPI_Comm_rank(communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - - if ( 0 && (communicator_base != communicator_world) ) { - std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< &processors) -{ - InitFromMPICommunicator(processors,communicator_world); -} - -#endif - -#if !defined( GRID_COMMS_MPI3) -int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; -int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; -#endif - -#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPIT) -double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes, int dir) -{ - std::vector list; - // Discard the "dir" - SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); - SendToRecvFromComplete(list); - return 2.0*bytes; -} -double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes, int dir) -{ - // Discard the "dir" - SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); - return 2.0*bytes; -} -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) -{ - SendToRecvFromComplete(waitall); -} -#endif - -#if !defined( GRID_COMMS_MPI3) - -void CartesianCommunicator::StencilBarrier(void){}; - -commVector CartesianCommunicator::ShmBufStorageVector; - -void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } - -void *CartesianCommunicator::ShmBuffer(int rank) { - return NULL; -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { - return NULL; -} -void CartesianCommunicator::ShmInitGeneric(void){ -#if 1 - int mmap_flag =0; -#ifdef MAP_ANONYMOUS - mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; -#endif -#ifdef MAP_ANON - mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; -#endif -#ifdef MAP_HUGETLB - if ( Hugepages ) mmap_flag |= MAP_HUGETLB; -#endif - ShmCommBuf =(void *) mmap(NULL, MAX_MPI_SHM_BYTES, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); - if (ShmCommBuf == (void *)MAP_FAILED) { - perror("mmap failed "); - exit(EXIT_FAILURE); - } -#ifdef MADV_HUGEPAGE - if (!Hugepages ) madvise(ShmCommBuf,MAX_MPI_SHM_BYTES,MADV_HUGEPAGE); -#endif -#else - ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); - ShmCommBuf=(void *)&ShmBufStorageVector[0]; -#endif - bzero(ShmCommBuf,MAX_MPI_SHM_BYTES); -} - -#endif } diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index c9dccd11..7d6911d3 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -32,117 +32,33 @@ Author: Peter Boyle /////////////////////////////////// // Processor layout information /////////////////////////////////// -#ifdef GRID_COMMS_MPI -#include -#endif -#ifdef GRID_COMMS_MPI3 -#include -#endif -#ifdef GRID_COMMS_MPIT -#include -#endif -#ifdef GRID_COMMS_SHMEM -#include -#endif +#include namespace Grid { -class CartesianCommunicator { - public: +class CartesianCommunicator : public SharedMemory { +public: //////////////////////////////////////////// - // Isend/Irecv/Wait, or Sendrecv blocking + // Policies //////////////////////////////////////////// enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential }; static CommunicatorPolicy_t CommunicatorPolicy; static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; } - - /////////////////////////////////////////// - // Up to 65536 ranks per node adequate for now - // 128MB shared memory for comms enought for 48^4 local vol comms - // Give external control (command line override?) of this - /////////////////////////////////////////// - static const int MAXLOG2RANKSPERNODE = 16; - static uint64_t MAX_MPI_SHM_BYTES; static int nCommThreads; - // use explicit huge pages - static int Hugepages; + //////////////////////////////////////////// // Communicator should know nothing of the physics grid, only processor grid. + //////////////////////////////////////////// int _Nprocessors; // How many in all std::vector _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank std::vector _processor_coor; // linear processor coordinate - unsigned long _ndimension; - -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPIT) - static MPI_Comm communicator_world; - - MPI_Comm communicator; - std::vector communicator_halo; - - typedef MPI_Request CommsRequest_t; - -#else - typedef int CommsRequest_t; -#endif - - - //////////////////////////////////////////////////////////////////// - // Helper functionality for SHM Windows common to all other impls - //////////////////////////////////////////////////////////////////// - // Longer term; drop this in favour of a master / slave model with - // cartesian communicator on a subset of ranks, slave ranks controlled - // by group leader with data xfer via shared memory - //////////////////////////////////////////////////////////////////// -#ifdef GRID_COMMS_MPI3 - - static int ShmRank; - static int ShmSize; - static int GroupRank; - static int GroupSize; - static int WorldRank; - static int WorldSize; - - std::vector WorldDims; - std::vector GroupDims; - std::vector ShmDims; - - std::vector GroupCoor; - std::vector ShmCoor; - std::vector WorldCoor; - - static std::vector GroupRanks; - static std::vector MyGroup; - static int ShmSetup; - static MPI_Win ShmWindow; - static MPI_Comm ShmComm; - - std::vector LexicographicToWorldRank; - - static std::vector ShmCommBufs; - -#else - static void ShmInitGeneric(void); - static commVector ShmBufStorageVector; -#endif - - ///////////////////////////////// - // Grid information and queries - // Implemented in Communicator_base.C - ///////////////////////////////// - static void * ShmCommBuf; - - - size_t heap_top; - size_t heap_bytes; - - void *ShmBufferSelf(void); - void *ShmBuffer(int rank); - void *ShmBufferTranslate(int rank,void * local_p); - void *ShmBufferMalloc(size_t bytes); - void ShmBufferFreeAll(void) ; + unsigned long _ndimension; + static Grid_MPI_Comm communicator_world; + Grid_MPI_Comm communicator; + std::vector communicator_halo; //////////////////////////////////////////////// // Must call in Grid startup @@ -158,13 +74,13 @@ class CartesianCommunicator { virtual ~CartesianCommunicator(); private: -#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPIT) || defined (GRID_COMMS_MPI3) + //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private //////////////////////////////////////////////// - void InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base); -#endif + void InitFromMPICommunicator(const std::vector &processors, Grid_MPI_Comm communicator_base); + public: @@ -182,8 +98,6 @@ class CartesianCommunicator { const std::vector & ThisProcessorCoor(void) ; const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; - int NodeCount(void) ; - int RankCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid @@ -271,15 +185,10 @@ class CartesianCommunicator { template void AllToAll(int dim,std::vector &in, std::vector &out){ assert(dim>=0); assert(dim<_ndimension); - int numnode = _processors[dim]; - // std::cerr << " AllToAll in.size() "< - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include -#include - -namespace Grid { - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -MPI_Comm CartesianCommunicator::communicator_world; - -// Should error check all MPI calls. -void CartesianCommunicator::Init(int *argc, char ***argv) { - int flag; - int provided; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - if ( provided != MPI_THREAD_MULTIPLE ) { - QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; - } - } - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - ShmInitGeneric(); -} - -CartesianCommunicator::~CartesianCommunicator() -{ - int MPI_is_finalised; - MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised) - MPI_Comm_free(&communicator); -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - int myrank = _processor; - int ierr; - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - MPI_Request xrq; - MPI_Request rrq; - - ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - - assert(ierr==0); - list.push_back(xrq); - list.push_back(rrq); - } else { - // Give the CPU to MPI immediately; can use threads to overlap optionally - ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, - recv,bytes,MPI_CHAR,from, from, - communicator,MPI_STATUS_IGNORE); - assert(ierr==0); - } -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); - } -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - /////////////////////////////////////////////////////// - // Should only be used prior to Grid Init finished. - // Check for this? - /////////////////////////////////////////////////////// -int CartesianCommunicator::RankWorld(void){ - int r; - MPI_Comm_rank(communicator_world,&r); - return r; -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - - - -} - diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index e41749d4..ef47d617 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -26,89 +26,20 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_NUMAIF_H -#include -#endif - +#include namespace Grid { -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -int CartesianCommunicator::ShmSetup = 0; +Grid_MPI_Comm CartesianCommunicator::communicator_world; -int CartesianCommunicator::ShmRank; -int CartesianCommunicator::ShmSize; -int CartesianCommunicator::GroupRank; -int CartesianCommunicator::GroupSize; -int CartesianCommunicator::WorldRank; -int CartesianCommunicator::WorldSize; - -MPI_Comm CartesianCommunicator::communicator_world; -MPI_Comm CartesianCommunicator::ShmComm; -MPI_Win CartesianCommunicator::ShmWindow; - -std::vector CartesianCommunicator::GroupRanks; -std::vector CartesianCommunicator::MyGroup; -std::vector CartesianCommunicator::ShmCommBufs; - -int CartesianCommunicator::NodeCount(void) { return GroupSize;}; -int CartesianCommunicator::RankCount(void) { return WorldSize;}; - - -#undef FORCE_COMMS -void *CartesianCommunicator::ShmBufferSelf(void) +//////////////////////////////////////////// +// First initialise of comms system +//////////////////////////////////////////// +void CartesianCommunicator::Init(int *argc, char ***argv) { - return ShmCommBufs[ShmRank]; -} -void *CartesianCommunicator::ShmBuffer(int rank) -{ - int gpeer = GroupRanks[rank]; -#ifdef FORCE_COMMS - return NULL; -#endif - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - return ShmCommBufs[gpeer]; - } -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) -{ - static int count =0; - int gpeer = GroupRanks[rank]; - assert(gpeer!=ShmRank); // never send to self - assert(rank!=WorldRank);// never send to self -#ifdef FORCE_COMMS - return NULL; -#endif - if (gpeer == MPI_UNDEFINED){ - return NULL; - } else { - uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank]; - uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset; - return (void *) remote; - } -} - -void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; int provided; - // mtrace(); MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { @@ -119,487 +50,202 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { Grid_quiesce_nodes(); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - MPI_Comm_rank(communicator_world,&WorldRank); - MPI_Comm_size(communicator_world,&WorldSize); - if ( WorldRank == 0 ) { - std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"< world_ranks(WorldSize); - GroupRanks.resize(WorldSize); - for(int r=0;r()); - int myleader = MyGroup[0]; - - std::vector leaders_1hot(WorldSize,0); - std::vector leaders_group(GroupSize,0); - leaders_1hot [ myleader ] = 1; - - /////////////////////////////////////////////////////////////////// - // global sum leaders over comm world - /////////////////////////////////////////////////////////////////// - int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world); - assert(ierr==0); - /////////////////////////////////////////////////////////////////// - // find the group leaders world rank - /////////////////////////////////////////////////////////////////// - int group=0; - for(int l=0;l shmids(ShmSize); - - if ( ShmRank == 0 ) { - for(int r=0;r coor = _processor_coor; // my coord - assert(std::abs(shift) <_processors[dim]); - - coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,source,_processors); - source = LexicographicToWorldRank[source]; - - coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,dest,_processors); - dest = LexicographicToWorldRank[dest]; - -}// rank is world rank. - + int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); + assert(ierr==0); +} int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { int rank; - Lexicographic::IndexFromCoor(coor,rank,_processors); - rank = LexicographicToWorldRank[rank]; + int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); + assert(ierr==0); return rank; -}// rank is world rank - +} void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) { - int lr=-1; - for(int r=0;r &processors) +{ + MPI_Comm optimal_comm; + GlobalSharedMemory::OptimalCommunicator (processors,optimal_comm); // Remap using the shared memory optimising routine + InitFromMPICommunicator(processors,optimal_comm); + SetCommunicator(optimal_comm); } ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -/* - * Use default in MPI compile - */ -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) - : CartesianCommunicator(processors) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) { - std::cout << "Attempts to split MPI3 communicators will fail until implemented" <= parent._ndimension); + std::vector parent_processor_coor(_ndimension,0); + std::vector parent_processors (_ndimension,1); + + // Can make 5d grid from 4d etc... + int pad = _ndimension-parent_ndimension; + for(int d=0;d ccoor(_ndimension); // coor within subcommunicator + std::vector scoor(_ndimension); // coor of split within parent + std::vector ssize(_ndimension); // coor of split within parent + + for(int d=0;d<_ndimension;d++){ + ccoor[d] = parent_processor_coor[d] % processors[d]; + scoor[d] = parent_processor_coor[d] / processors[d]; + ssize[d] = parent_processors[d] / processors[d]; + } + + // rank within subcomm ; srank is rank of subcomm within blocks of subcomms + int crank; + // Mpi uses the reverse Lexico convention to us; so reversed routines called + Lexicographic::IndexFromCoorReversed(ccoor,crank,processors); // processors is the split grid dimensions + Lexicographic::IndexFromCoorReversed(scoor,srank,ssize); // ssize is the number of split grids + + MPI_Comm comm_split; + if ( Nchild > 1 ) { + + if(0){ + std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors) -{ - int ierr; - communicator=communicator_world; - +void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) +{ _ndimension = processors.size(); + _processor_coor.resize(_ndimension); + + ///////////////////////////////// + // Count the requested nodes + ///////////////////////////////// + _Nprocessors=1; + _processors = processors; + for(int i=0;i<_ndimension;i++){ + _Nprocessors*=_processors[i]; + } + + std::vector periodic(_ndimension,1); + MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); + MPI_Comm_rank(communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); + + if ( 0 && (communicator_base != communicator_world) ) { + std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"< WorldDims = processors; - - ShmDims.resize (_ndimension,1); - GroupDims.resize(_ndimension); - ShmCoor.resize (_ndimension); - GroupCoor.resize(_ndimension); - WorldCoor.resize(_ndimension); - - int dim = 0; - for(int l2=0;l2 coor(_ndimension); - ProcessorCoorFromRank(wr,coor); // from world rank - int ck = RankFromProcessorCoor(coor); - assert(ck==wr); - - if ( wr == WorldRank ) { - for(int j=0;j mcoor = coor; - this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int)); - for(int d = 0 ; d< _ndimension; d++) { - assert(coor[d] == mcoor[d]); - } - } -}; CartesianCommunicator::~CartesianCommunicator() { int MPI_is_finalised; @@ -734,19 +380,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector row(_ndimension,1); + assert(dim>=0 && dim<_ndimension); + + // Split the communicator + row[dim] = _processors[dim]; + + int me; + CartesianCommunicator Comm(row,*this,me); + Comm.AllToAll(in,out,words,bytes); +} +void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t bytes) +{ + // MPI is a pain and uses "int" arguments + // 64*64*64*128*16 == 500Million elements of data. + // When 24*4 bytes multiples get 50x 10^9 >>> 2x10^9 Y2K bug. + // (Turns up on 32^3 x 64 Gparity too) + MPI_Datatype object; + int iwords; + int ibytes; + iwords = words; + ibytes = bytes; + assert(words == iwords); // safe to cast to int ? + assert(bytes == ibytes); // safe to cast to int ? + MPI_Type_contiguous(ibytes,MPI_BYTE,&object); + MPI_Type_commit(&object); + MPI_Alltoall(in,iwords,object,out,iwords,object,communicator); + MPI_Type_free(&object); +} + + + } diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc deleted file mode 100644 index da863508..00000000 --- a/lib/communicator/Communicator_mpi3_leader.cc +++ /dev/null @@ -1,991 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include "Grid.h" -#include -//#include - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////// -/// Workarounds: -/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix. -/// darwin dispatch semaphores don't seem to be multiprocess. -/// -/// ii) openmpi under --mca shmem posix works with two squadrons per node; -/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME -/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI. -/// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -typedef sem_t *Grid_semaphore; - - -#error /*THis is deprecated*/ - -#if 0 -#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED ); -#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED ); -#define SEM_POST(S) assert ( sem_post(S) == 0 ); -#define SEM_WAIT(S) assert ( sem_wait(S) == 0 ); -#else -#define SEM_INIT(S) ; -#define SEM_INIT_EXCL(S) ; -#define SEM_POST(S) ; -#define SEM_WAIT(S) ; -#endif -#include - -namespace Grid { - -enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV }; - -struct Descriptor { - uint64_t buf; - size_t bytes; - int rank; - int tag; - int command; - uint64_t xbuf; - uint64_t rbuf; - int xtag; - int rtag; - int src; - int dest; - MPI_Request request; -}; - -const int pool = 48; - -class SlaveState { -public: - volatile int head; - volatile int start; - volatile int tail; - volatile Descriptor Descrs[pool]; -}; - -class Slave { -public: - Grid_semaphore sem_head; - Grid_semaphore sem_tail; - SlaveState *state; - MPI_Comm squadron; - uint64_t base; - int universe_rank; - int vertical_rank; - char sem_name [NAME_MAX]; - //////////////////////////////////////////////////////////// - // Descriptor circular pointers - //////////////////////////////////////////////////////////// - Slave() {}; - - void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank); - - void SemInit(void) { - sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - SEM_INIT(sem_head); - sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - SEM_INIT(sem_tail); - } - void SemInitExcl(void) { - sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); - SEM_INIT_EXCL(sem_head); - sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); - SEM_INIT_EXCL(sem_tail); - } - void WakeUpDMA(void) { - SEM_POST(sem_head); - }; - void WakeUpCompute(void) { - SEM_POST(sem_tail); - }; - void WaitForCommand(void) { - SEM_WAIT(sem_head); - }; - void WaitForComplete(void) { - SEM_WAIT(sem_tail); - }; - void EventLoop (void) { - // std::cout<< " Entering event loop "<head,0,0); - int s=state->start; - if ( s != state->head ) { - _mm_mwait(0,0); - } -#endif - Event(); - } - } - - int Event (void) ; - - uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ; - void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ; - - void WaitAll() { - // std::cout << "Queueing WAIT command "<tail != state->head ); - } -}; - -//////////////////////////////////////////////////////////////////////// -// One instance of a data mover. -// Master and Slave must agree on location in shared memory -//////////////////////////////////////////////////////////////////////// - -class MPIoffloadEngine { -public: - - static std::vector Slaves; - - static int ShmSetup; - - static int UniverseRank; - static int UniverseSize; - - static MPI_Comm communicator_universe; - static MPI_Comm communicator_cached; - - static MPI_Comm HorizontalComm; - static int HorizontalRank; - static int HorizontalSize; - - static MPI_Comm VerticalComm; - static MPI_Win VerticalWindow; - static int VerticalSize; - static int VerticalRank; - - static std::vector VerticalShmBufs; - static std::vector > UniverseRanks; - static std::vector UserCommunicatorToWorldRanks; - - static MPI_Group WorldGroup, CachedGroup; - - static void CommunicatorInit (MPI_Comm &communicator_world, - MPI_Comm &ShmComm, - void * &ShmCommBuf); - - static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank); - - ///////////////////////////////////////////////////////// - // routines for master proc must handle any communicator - ///////////////////////////////////////////////////////// - - static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) { - // std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<= units ) { - mywork = myoff = 0; - } else { - mywork = (nwork+me)/units; - myoff = basework * me; - if ( me > backfill ) - myoff+= (me-backfill); - } - return; - }; - - static void QueueRoundRobinSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) { - uint8_t * cxbuf = (uint8_t *) xbuf; - uint8_t * crbuf = (uint8_t *) rbuf; - static int rrp=0; - int procs = VerticalSize-1; - int myoff=0; - int mywork=bytes; - QueueSendRecv(rrp+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src); - rrp = rrp+1; - if ( rrp == (VerticalSize-1) ) rrp = 0; - } - - static void QueueMultiplexedSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) { - uint8_t * cxbuf = (uint8_t *) xbuf; - uint8_t * crbuf = (uint8_t *) rbuf; - int mywork, myoff, procs; - procs = VerticalSize-1; - for(int s=0;s MPIoffloadEngine::Slaves; - -int MPIoffloadEngine::UniverseRank; -int MPIoffloadEngine::UniverseSize; - -MPI_Comm MPIoffloadEngine::communicator_universe; -MPI_Comm MPIoffloadEngine::communicator_cached; -MPI_Group MPIoffloadEngine::WorldGroup; -MPI_Group MPIoffloadEngine::CachedGroup; - -MPI_Comm MPIoffloadEngine::HorizontalComm; -int MPIoffloadEngine::HorizontalRank; -int MPIoffloadEngine::HorizontalSize; - -MPI_Comm MPIoffloadEngine::VerticalComm; -int MPIoffloadEngine::VerticalSize; -int MPIoffloadEngine::VerticalRank; -MPI_Win MPIoffloadEngine::VerticalWindow; -std::vector MPIoffloadEngine::VerticalShmBufs; -std::vector > MPIoffloadEngine::UniverseRanks; -std::vector MPIoffloadEngine::UserCommunicatorToWorldRanks; - -int CartesianCommunicator::NodeCount(void) { return HorizontalSize;}; -int MPIoffloadEngine::ShmSetup = 0; - -void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, - MPI_Comm &ShmComm, - void * &ShmCommBuf) -{ - int flag; - assert(ShmSetup==0); - - ////////////////////////////////////////////////////////////////////// - // Universe is all nodes prior to squadron grouping - ////////////////////////////////////////////////////////////////////// - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe); - MPI_Comm_rank(communicator_universe,&UniverseRank); - MPI_Comm_size(communicator_universe,&UniverseSize); - - ///////////////////////////////////////////////////////////////////// - // Split into groups that can share memory (Verticals) - ///////////////////////////////////////////////////////////////////// -#undef MPI_SHARED_MEM_DEBUG -#ifdef MPI_SHARED_MEM_DEBUG - MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm); -#else - MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm); -#endif - MPI_Comm_rank(VerticalComm ,&VerticalRank); - MPI_Comm_size(VerticalComm ,&VerticalSize); - - ////////////////////////////////////////////////////////////////////// - // Split into horizontal groups by rank in squadron - ////////////////////////////////////////////////////////////////////// - MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm); - MPI_Comm_rank(HorizontalComm,&HorizontalRank); - MPI_Comm_size(HorizontalComm,&HorizontalSize); - assert(HorizontalSize*VerticalSize==UniverseSize); - - //////////////////////////////////////////////////////////////////////////////// - // What is my place in the world - //////////////////////////////////////////////////////////////////////////////// - int WorldRank=0; - if(VerticalRank==0) WorldRank = HorizontalRank; - int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm); - assert(ierr==0); - - //////////////////////////////////////////////////////////////////////////////// - // Where is the world in the universe? - //////////////////////////////////////////////////////////////////////////////// - UniverseRanks = std::vector >(HorizontalSize,std::vector(VerticalSize,0)); - UniverseRanks[WorldRank][VerticalRank] = UniverseRank; - for(int w=0;w0 ) size = sizeof(SlaveState); - - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); - - shm_unlink(shm_name); - - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); - if ( fd < 0 ) { - perror("failed shm_open"); - assert(0); - } - - ftruncate(fd, size); - - VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if ( VerticalShmBufs[r] == MAP_FAILED ) { - perror("failed mmap"); - assert(0); - } - - /* - for(uint64_t page=0;page0 ) size = sizeof(SlaveState); - - sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); - - int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); - if ( fd<0 ) { - perror("failed shm_open"); - assert(0); - } - VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - uint64_t * check = (uint64_t *) VerticalShmBufs[r]; - assert(check[0]== WorldRank); - assert(check[1]== r); - // std::cerr<<"SHM "<"<"< cached_ranks(size); - - for(int r=0;r"<>0 )&0xFFFF)^((icomm>>16)&0xFFFF) - ^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF); - - // hashtag = (comm_hash<<15) | tag; - hashtag = tag; - -}; - -void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank) -{ - squadron=_squadron; - universe_rank=_universe_rank; - vertical_rank=_vertical_rank; - state =_state; - // std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<head = state->tail = state->start = 0; - base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0]; - int rank; MPI_Comm_rank(_squadron,&rank); -} -#define PERI_PLUS(A) ( (A+1)%pool ) -int Slave::Event (void) { - - static int tail_last; - static int head_last; - static int start_last; - int ierr; - MPI_Status stat; - static int i=0; - - //////////////////////////////////////////////////// - // Try to advance the start pointers - //////////////////////////////////////////////////// - int s=state->start; - if ( s != state->head ) { - switch ( state->Descrs[s].command ) { - case COMMAND_ISEND: - ierr = MPI_Isend((void *)(state->Descrs[s].buf+base), - state->Descrs[s].bytes, - MPI_CHAR, - state->Descrs[s].rank, - state->Descrs[s].tag, - MPIoffloadEngine::communicator_universe, - (MPI_Request *)&state->Descrs[s].request); - assert(ierr==0); - state->start = PERI_PLUS(s); - return 1; - break; - - case COMMAND_IRECV: - ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base), - state->Descrs[s].bytes, - MPI_CHAR, - state->Descrs[s].rank, - state->Descrs[s].tag, - MPIoffloadEngine::communicator_universe, - (MPI_Request *)&state->Descrs[s].request); - - // std::cout<< " Request is "<Descrs[s].request<Descrs[0].request<start = PERI_PLUS(s); - return 1; - break; - - case COMMAND_SENDRECV: - - // fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10); - - ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10, - (void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10, - MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE); - - assert(ierr==0); - - // fprintf(stderr,"Sendrecv done %d %d\n",ierr,i); - // MPI_Barrier(MPIoffloadEngine::HorizontalComm); - // fprintf(stderr,"Barrier\n"); - i++; - - state->start = PERI_PLUS(s); - - return 1; - break; - - case COMMAND_WAITALL: - - for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){ - if ( state->Descrs[t].command != COMMAND_SENDRECV ) { - MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE); - } - }; - s=PERI_PLUS(s); - state->start = s; - state->tail = s; - - WakeUpCompute(); - - return 1; - break; - - default: - assert(0); - break; - } - } - return 0; -} - ////////////////////////////////////////////////////////////////////////////// - // External interaction with the queue - ////////////////////////////////////////////////////////////////////////////// - -void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) -{ - int head =state->head; - int next = PERI_PLUS(head); - - // Set up descriptor - int worldrank; - int hashtag; - MPI_Comm communicator; - MPI_Request request; - uint64_t relative; - - relative = (uint64_t)xbuf - base; - state->Descrs[head].xbuf = relative; - - relative= (uint64_t)rbuf - base; - state->Descrs[head].rbuf = relative; - - state->Descrs[head].bytes = bytes; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest); - state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].xtag = hashtag; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src); - state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].rtag = hashtag; - - state->Descrs[head].command= COMMAND_SENDRECV; - - // Block until FIFO has space - while( state->tail==next ); - - // Msync on weak order architectures - - // Advance pointer - state->head = next; - -}; -uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank) -{ - ///////////////////////////////////////// - // Spin; if FIFO is full until not full - ///////////////////////////////////////// - int head =state->head; - int next = PERI_PLUS(head); - - // Set up descriptor - int worldrank; - int hashtag; - MPI_Comm communicator; - MPI_Request request; - - MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank); - - uint64_t relative= (uint64_t)buf - base; - state->Descrs[head].buf = relative; - state->Descrs[head].bytes = bytes; - state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; - state->Descrs[head].tag = hashtag; - state->Descrs[head].command= command; - - /* - if ( command == COMMAND_ISEND ) { - std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank - << " to worldrank " << worldrank <tail==next ); - - // Msync on weak order architectures - // Advance pointer - state->head = next; - - return 0; -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// - -MPI_Comm CartesianCommunicator::communicator_world; - -void CartesianCommunicator::Init(int *argc, char ***argv) -{ - int flag; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init(argc,argv); - } - communicator_world = MPI_COMM_WORLD; - MPI_Comm ShmComm; - MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - int Size; - MPI_Comm_size(communicator_world,&Size); - assert(Size==_Nprocessors); - - _processor_coor.resize(_ndimension); - MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); - MPI_Comm_rank (communicator,&_processor); - MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); -}; - -CartesianCommunicator::~CartesianCommunicator() = default; - - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - MPI_Request xrq; - MPI_Request rrq; - int rank = _processor; - int ierr; - ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - - assert(ierr==0); - - list.push_back(xrq); - list.push_back(rrq); -} - -void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - uint64_t xmit_i = (uint64_t) xmit; - uint64_t recv_i = (uint64_t) recv; - uint64_t shm = (uint64_t) ShmCommBuf; - // assert xmit and recv lie in shared memory region - assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); - assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); - assert(from!=_processor); - assert(dest!=_processor); - - MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); - - //MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); - - //MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest); - //MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from); -} - -void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) -{ - MPIoffloadEngine::WaitAll(); - //this->Barrier(); -} - -void CartesianCommunicator::StencilBarrier(void) { } - -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - -void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } - -void *CartesianCommunicator::ShmBuffer(int rank) { - return NULL; -} -void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { - return NULL; -} - - -}; - diff --git a/lib/communicator/Communicator_mpit.cc b/lib/communicator/Communicator_mpit.cc deleted file mode 100644 index bceea0d8..00000000 --- a/lib/communicator/Communicator_mpit.cc +++ /dev/null @@ -1,273 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_mpi.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include -#include - -namespace Grid { - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// -MPI_Comm CartesianCommunicator::communicator_world; - -// Should error check all MPI calls. -void CartesianCommunicator::Init(int *argc, char ***argv) { - int flag; - int provided; - MPI_Initialized(&flag); // needed to coexist with other libs apparently - if ( !flag ) { - MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); - if ( provided != MPI_THREAD_MULTIPLE ) { - QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute; - } - } - MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); - ShmInitGeneric(); -} - -CartesianCommunicator::~CartesianCommunicator() -{ - int MPI_is_finalised; - MPI_Finalized(&MPI_is_finalised); - if (communicator && !MPI_is_finalised){ - MPI_Comm_free(&communicator); - for(int i=0;i< communicator_halo.size();i++){ - MPI_Comm_free(&communicator_halo[i]); - } - } -} - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint32_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalXOR(uint64_t &u){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(float &f){ - int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSum(double &d) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); - assert(ierr==0); -} -void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) -{ - int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); - assert(ierr==0); -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); - assert(ierr==0); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - coor.resize(_ndimension); - int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); - assert(ierr==0); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - MPI_Status stat; - assert(sender != receiver); - int tag = sender; - if ( _processor == sender ) { - MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); - } - if ( _processor == receiver ) { - MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - int myrank = _processor; - int ierr; - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - MPI_Request xrq; - MPI_Request rrq; - - ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); - ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); - - assert(ierr==0); - list.push_back(xrq); - list.push_back(rrq); - } else { - // Give the CPU to MPI immediately; can use threads to overlap optionally - ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank, - recv,bytes,MPI_CHAR,from, from, - communicator,MPI_STATUS_IGNORE); - assert(ierr==0); - } -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { - int nreq=list.size(); - std::vector status(nreq); - int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); - } -} - -void CartesianCommunicator::Barrier(void) -{ - int ierr = MPI_Barrier(communicator); - assert(ierr==0); -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - int ierr=MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator); - assert(ierr==0); -} - /////////////////////////////////////////////////////// - // Should only be used prior to Grid Init finished. - // Check for this? - /////////////////////////////////////////////////////// -int CartesianCommunicator::RankWorld(void){ - int r; - MPI_Comm_rank(communicator_world,&r); - return r; -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ - int ierr= MPI_Bcast(data, - bytes, - MPI_BYTE, - root, - communicator_world); - assert(ierr==0); -} - -double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes,int dir) -{ - int myrank = _processor; - int ierr; - int ncomm =communicator_halo.size(); - int commdir=dir%ncomm; - - // std::cout << " sending on communicator "< &waitall,int dir) -{ - int nreq=waitall.size(); - MPI_Waitall(nreq, &waitall[0], MPI_STATUSES_IGNORE); -} -double CartesianCommunicator::StencilSendToRecvFrom(void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes,int dir) -{ - int myrank = _processor; - int ierr; - // std::cout << " sending on communicator "< &processors,const CartesianCommunicator &parent,int &srank) - : CartesianCommunicator(processors) { srank=0;} + : CartesianCommunicator(processors) +{ + srank=0; + SetCommunicator(communicator_world); +} CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { @@ -54,6 +62,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(_processors[d]==1); _processor_coor[d] = 0; } + SetCommunicator(communicator_world); } CartesianCommunicator::~CartesianCommunicator(){} @@ -121,6 +130,36 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest dest=0; } +double CartesianCommunicator::StencilSendToRecvFrom( void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + std::vector list; + // Discard the "dir" + SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + SendToRecvFromComplete(list); + return 2.0*bytes; +} +double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes, int dir) +{ + // Discard the "dir" + SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); + return 2.0*bytes; +} +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall,int dir) +{ + SendToRecvFromComplete(waitall); +} + +void CartesianCommunicator::StencilBarrier(void){}; + } diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc deleted file mode 100644 index d63a5af4..00000000 --- a/lib/communicator/Communicator_shmem.cc +++ /dev/null @@ -1,360 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/communicator/Communicator_shmem.cc - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include -#include - -namespace Grid { - - // Should error check all MPI calls. -#define SHMEM_VET(addr) - -#define SHMEM_VET_DEBUG(addr) { \ - if ( ! shmem_addr_accessible(addr,_processor) ) {\ - std::fprintf(stderr,"%d Inaccessible shmem address %lx %s %s\n",_processor,addr,__FUNCTION__,#addr); \ - BACKTRACEFILE(); \ - }\ -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Info that is setup once and indept of cartesian layout -/////////////////////////////////////////////////////////////////////////////////////////////////// - -typedef struct HandShake_t { - uint64_t seq_local; - uint64_t seq_remote; -} HandShake; - -std::array make_psync_init(void) { - std::array ret; - ret.fill(SHMEM_SYNC_VALUE); - return ret; -} -static std::array psync_init = make_psync_init(); - -static Vector< HandShake > XConnections; -static Vector< HandShake > RConnections; - -void CartesianCommunicator::Init(int *argc, char ***argv) { - shmem_init(); - XConnections.resize(shmem_n_pes()); - RConnections.resize(shmem_n_pes()); - for(int pe =0 ; pe &processors,const CartesianCommunicator &parent) - : CartesianCommunicator(processors) -{ - std::cout << "Attempts to split SHMEM communicators will fail " < &processors) -{ - _ndimension = processors.size(); - std::vector periodic(_ndimension,1); - - _Nprocessors=1; - _processors = processors; - _processor_coor.resize(_ndimension); - - _processor = shmem_my_pe(); - - Lexicographic::CoorFromIndex(_processor_coor,_processor,_processors); - - for(int i=0;i<_ndimension;i++){ - _Nprocessors*=_processors[i]; - } - - int Size = shmem_n_pes(); - - - assert(Size==_Nprocessors); -} - -CartesianCommunicator::~CartesianCommunicator() = default; - - -void CartesianCommunicator::GlobalSum(uint32_t &u){ - static long long source ; - static long long dest ; - static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - // int nreduce=1; - // int pestart=0; - // int logStride=0; - - source = u; - dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); // necessary? - u = dest; -} -void CartesianCommunicator::GlobalSum(uint64_t &u){ - static long long source ; - static long long dest ; - static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - // int nreduce=1; - // int pestart=0; - // int logStride=0; - - source = u; - dest = 0; - shmem_longlong_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); // necessary? - u = dest; -} -void CartesianCommunicator::GlobalSum(float &f){ - static float source ; - static float dest ; - static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - source = f; - dest =0.0; - shmem_float_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - f = dest; -} -void CartesianCommunicator::GlobalSumVector(float *f,int N) -{ - static float source ; - static float dest = 0 ; - static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - if ( shmem_addr_accessible(f,_processor) ){ - shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - return; - } - - for(int i=0;i psync = psync_init; - - source = d; - dest = 0; - shmem_double_sum_to_all(&dest,&source,1,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - d = dest; -} -void CartesianCommunicator::GlobalSumVector(double *d,int N) -{ - static double source ; - static double dest ; - static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static std::array psync = psync_init; - - - if ( shmem_addr_accessible(d,_processor) ){ - shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync.data()); - shmem_barrier_all(); - return; - } - - for(int i=0;i coor = _processor_coor; - - assert(std::abs(shift) <_processors[dim]); - - coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,source,_processors); - - coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; - Lexicographic::IndexFromCoor(coor,dest,_processors); - -} -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - int rank; - Lexicographic::IndexFromCoor(coor,rank,_processors); - return rank; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ - Lexicographic::CoorFromIndex(coor,rank,_processors); -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - SHMEM_VET(xmit); - SHMEM_VET(recv); - std::vector reqs(0); - SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); - SendToRecvFromComplete(reqs); -} - -void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int sender, - int receiver, - int bytes) -{ - static uint64_t seq; - - assert(recv!=xmit); - volatile HandShake *RecvSeq = (volatile HandShake *) & RConnections[sender]; - volatile HandShake *SendSeq = (volatile HandShake *) & XConnections[receiver]; - - if ( _processor == sender ) { - - // Check he has posted a receive - while(SendSeq->seq_remote == SendSeq->seq_local); - - // Advance our send count - seq = ++(SendSeq->seq_local); - - // Send this packet - SHMEM_VET(recv); - shmem_putmem(recv,xmit,bytes,receiver); - shmem_fence(); - - //Notify him we're done - shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver); - shmem_fence(); - } - if ( _processor == receiver ) { - - // Post a receive - seq = ++(RecvSeq->seq_local); - shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender); - - // Now wait until he has advanced our reception counter - while(RecvSeq->seq_remote != RecvSeq->seq_local); - - } -} - -// Basic Halo comms primitive -void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) -{ - SHMEM_VET(xmit); - SHMEM_VET(recv); - // shmem_putmem_nb(recv,xmit,bytes,dest,NULL); - shmem_putmem(recv,xmit,bytes,dest); - - if ( CommunicatorPolicy == CommunicatorPolicySequential ) shmem_barrier_all(); -} -void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) -{ - // shmem_quiet(); // I'm done - if( CommunicatorPolicy == CommunicatorPolicyConcurrent ) shmem_barrier_all();// He's done too -} -void CartesianCommunicator::Barrier(void) -{ - shmem_barrier_all(); -} -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ - static std::array psync = psync_init; - static uint32_t word; - uint32_t *array = (uint32_t *) data; - assert( (bytes % 4)==0); - int words = bytes/4; - - if ( shmem_addr_accessible(data,_processor) ){ - shmem_broadcast32(data,data,words,root,0,0,shmem_n_pes(),psync.data()); - return; - } - - for(int w=0;w psync = psync_init; - static uint32_t word; - uint32_t *array = (uint32_t *) data; - assert( (bytes % 4)==0); - int words = bytes/4; - - for(int w=0;w + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +// static data + +uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; +int GlobalSharedMemory::Hugepages = 0; +int GlobalSharedMemory::_ShmSetup; +int GlobalSharedMemory::_ShmAlloc; +uint64_t GlobalSharedMemory::_ShmAllocBytes; + +std::vector GlobalSharedMemory::WorldShmCommBufs; + +Grid_MPI_Comm GlobalSharedMemory::WorldShmComm; +int GlobalSharedMemory::WorldShmRank; +int GlobalSharedMemory::WorldShmSize; +std::vector GlobalSharedMemory::WorldShmRanks; + +Grid_MPI_Comm GlobalSharedMemory::WorldComm; +int GlobalSharedMemory::WorldSize; +int GlobalSharedMemory::WorldRank; + +int GlobalSharedMemory::WorldNodes; +int GlobalSharedMemory::WorldNode; + +void GlobalSharedMemory::SharedMemoryFree(void) +{ + assert(_ShmAlloc); + assert(_ShmAllocBytes>0); + for(int r=0;r= heap_size) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + + +// TODO +// 1) move includes into SharedMemory.cc +// +// 2) split shared memory into a) optimal communicator creation from comm world +// +// b) shared memory buffers container +// -- static globally shared; init once +// -- per instance set of buffers. +// + +#pragma once + +#include + +#if defined (GRID_COMMS_MPI3) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_NUMAIF_H +#include +#endif + +namespace Grid { + +#if defined (GRID_COMMS_MPI3) + typedef MPI_Comm Grid_MPI_Comm; + typedef MPI_Request CommsRequest_t; +#else + typedef int CommsRequest_t; + typedef int Grid_MPI_Comm; +#endif + +class GlobalSharedMemory { + private: + static const int MAXLOG2RANKSPERNODE = 16; + + // Init once lock on the buffer allocation + static int _ShmSetup; + static int _ShmAlloc; + static uint64_t _ShmAllocBytes; + + public: + static int ShmSetup(void) { return _ShmSetup; } + static int ShmAlloc(void) { return _ShmAlloc; } + static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; } + static uint64_t MAX_MPI_SHM_BYTES; + static int Hugepages; + + static std::vector WorldShmCommBufs; + + static Grid_MPI_Comm WorldComm; + static int WorldRank; + static int WorldSize; + + static Grid_MPI_Comm WorldShmComm; + static int WorldShmRank; + static int WorldShmSize; + + static int WorldNodes; + static int WorldNode; + + static std::vector WorldShmRanks; + + ////////////////////////////////////////////////////////////////////////////////////// + // Create an optimal reordered communicator that makes MPI_Cart_create get it right + ////////////////////////////////////////////////////////////////////////////////////// + static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD + static void OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + /////////////////////////////////////////////////// + // Provide shared memory facilities off comm world + /////////////////////////////////////////////////// + static void SharedMemoryAllocate(uint64_t bytes, int flags); + static void SharedMemoryFree(void); + +}; + +////////////////////////////// +// one per communicator +////////////////////////////// +class SharedMemory +{ + private: + static const int MAXLOG2RANKSPERNODE = 16; + + size_t heap_top; + size_t heap_bytes; + size_t heap_size; + + protected: + + Grid_MPI_Comm ShmComm; // for barriers + int ShmRank; + int ShmSize; + std::vector ShmCommBufs; + std::vector ShmRanks;// Mapping comm ranks to Shm ranks + + public: + SharedMemory() {}; + /////////////////////////////////////////////////////////////////////////////////////// + // set the buffers & sizes + /////////////////////////////////////////////////////////////////////////////////////// + void SetCommunicator(Grid_MPI_Comm comm); + + //////////////////////////////////////////////////////////////////////// + // For this instance ; disjoint buffer sets between splits if split grid + //////////////////////////////////////////////////////////////////////// + void ShmBarrier(void); + + /////////////////////////////////////////////////// + // Call on any instance + /////////////////////////////////////////////////// + void SharedMemoryTest(void); + void *ShmBufferSelf(void); + void *ShmBuffer (int rank); + void *ShmBufferTranslate(int rank,void * local_p); + void *ShmBufferMalloc(size_t bytes); + void ShmBufferFreeAll(void) ; + + ////////////////////////////////////////////////////////////////////////// + // Make info on Nodes & ranks and Shared memory available + ////////////////////////////////////////////////////////////////////////// + int NodeCount(void) { return GlobalSharedMemory::WorldNodes;}; + int RankCount(void) { return GlobalSharedMemory::WorldSize;}; + +}; + +} diff --git a/lib/communicator/SharedMemoryMPI.cc b/lib/communicator/SharedMemoryMPI.cc new file mode 100644 index 00000000..d7bd7c65 --- /dev/null +++ b/lib/communicator/SharedMemoryMPI.cc @@ -0,0 +1,395 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/SharedMemory.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +/*Construct from an MPI communicator*/ +void GlobalSharedMemory::Init(Grid_MPI_Comm comm) +{ + assert(_ShmSetup==0); + WorldComm = comm; + MPI_Comm_rank(WorldComm,&WorldRank); + MPI_Comm_size(WorldComm,&WorldSize); + // WorldComm, WorldSize, WorldRank + + ///////////////////////////////////////////////////////////////////// + // Split into groups that can share memory + ///////////////////////////////////////////////////////////////////// + MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm); + MPI_Comm_rank(WorldShmComm ,&WorldShmRank); + MPI_Comm_size(WorldShmComm ,&WorldShmSize); + // WorldShmComm, WorldShmSize, WorldShmRank + + // WorldNodes + WorldNodes = WorldSize/WorldShmSize; + assert( (WorldNodes * WorldShmSize) == WorldSize ); + + // FIXME: Check all WorldShmSize are the same ? + + ///////////////////////////////////////////////////////////////////// + // find world ranks in our SHM group (i.e. which ranks are on our node) + ///////////////////////////////////////////////////////////////////// + MPI_Group WorldGroup, ShmGroup; + MPI_Comm_group (WorldComm, &WorldGroup); + MPI_Comm_group (WorldShmComm, &ShmGroup); + + std::vector world_ranks(WorldSize); for(int r=0;r MyGroup; + MyGroup.resize(WorldShmSize); + for(int rank=0;rank()); + int myleader = MyGroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(WorldNodes,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,WorldComm); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l &processors,Grid_MPI_Comm & optimal_comm) +{ + //////////////////////////////////////////////////////////////// + // Assert power of two shm_size. + //////////////////////////////////////////////////////////////// + int log2size = -1; + for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ + if ( (0x1< processor_coor(ndimension); + std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); + std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); + int dim = 0; + for(int l2=0;l2 ranks(size); for(int r=0;r + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + +namespace Grid { + +/*Construct from an MPI communicator*/ +void GlobalSharedMemory::Init(Grid_MPI_Comm comm) +{ + assert(_ShmSetup==0); + WorldComm = 0; + WorldRank = 0; + WorldSize = 1; + WorldShmComm = 0 ; + WorldShmRank = 0 ; + WorldShmSize = 1 ; + WorldNodes = 1 ; + WorldNode = 0 ; + WorldShmRanks.resize(WorldSize); WorldShmRanks[0] = 0; + WorldShmCommBufs.resize(1); + _ShmSetup=1; +} + +void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +{ + optimal_comm = WorldComm; +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// Hugetlbfs mapping intended, use anonymous mmap +//////////////////////////////////////////////////////////////////////////////////////////// +void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) +{ + void * ShmCommBuf ; + assert(_ShmSetup==1); + assert(_ShmAlloc==0); + int mmap_flag =0; +#ifdef MAP_ANONYMOUS + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANONYMOUS; +#endif +#ifdef MAP_ANON + mmap_flag = mmap_flag| MAP_SHARED | MAP_ANON; +#endif +#ifdef MAP_HUGETLB + if ( flags ) mmap_flag |= MAP_HUGETLB; +#endif + ShmCommBuf =(void *) mmap(NULL, bytes, PROT_READ | PROT_WRITE, mmap_flag, -1, 0); + if (ShmCommBuf == (void *)MAP_FAILED) { + perror("mmap failed "); + exit(EXIT_FAILURE); + } +#ifdef MADV_HUGEPAGE + if (!Hugepages ) madvise(ShmCommBuf,bytes,MADV_HUGEPAGE); +#endif + bzero(ShmCommBuf,bytes); + WorldShmCommBufs[0] = ShmCommBuf; + _ShmAllocBytes=bytes; + _ShmAlloc=1; +}; + + //////////////////////////////////////////////////////// + // Global shared functionality finished + // Now move to per communicator functionality + //////////////////////////////////////////////////////// +void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) +{ + assert(GlobalSharedMemory::ShmAlloc()==1); + ShmRanks.resize(1); + ShmCommBufs.resize(1); + ShmRanks[0] = 0; + ShmRank = 0; + ShmSize = 1; + ////////////////////////////////////////////////////////////////////// + // Map ShmRank to WorldShmRank and use the right buffer + ////////////////////////////////////////////////////////////////////// + ShmCommBufs[0] = GlobalSharedMemory::WorldShmCommBufs[0]; + heap_size = GlobalSharedMemory::ShmAllocBytes(); + ShmBufferFreeAll(); + return; +} +////////////////////////////////////////////////////////////////// +// On node barrier +////////////////////////////////////////////////////////////////// +void SharedMemory::ShmBarrier(void){ return ; } + +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Test the shared memory is working +////////////////////////////////////////////////////////////////////////////////////////////////////////// +void SharedMemory::SharedMemoryTest(void) { return; } + +void *SharedMemory::ShmBuffer(int rank) +{ + return NULL; +} +void *SharedMemory::ShmBufferTranslate(int rank,void * local_p) +{ + return NULL; +} + +} diff --git a/lib/qcd/action/fermion/WilsonCompressor.h b/lib/qcd/action/fermion/WilsonCompressor.h index cc5c3c63..b47700ac 100644 --- a/lib/qcd/action/fermion/WilsonCompressor.h +++ b/lib/qcd/action/fermion/WilsonCompressor.h @@ -265,7 +265,6 @@ public: if ( timer3 ) std::cout << GridLogMessage << " timer3 (commsMergeShm) " < same_node; std::vector surface_list; diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index 887d8a7c..69c010f4 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -105,7 +105,6 @@ template class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. public: - typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; typedef typename cobj::vector_type vector_type; typedef typename cobj::scalar_type scalar_type; typedef typename cobj::scalar_object scalar_object; diff --git a/lib/util/Init.cc b/lib/util/Init.cc index 20367293..fb3d7a1e 100644 --- a/lib/util/Init.cc +++ b/lib/util/Init.cc @@ -220,11 +220,11 @@ void Grid_init(int *argc,char ***argv) arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); GridCmdOptionInt(arg,MB); uint64_t MB64 = MB; - CartesianCommunicator::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; + GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL; } if( GridCmdOptionExists(*argv,*argv+*argc,"--shm-hugepages") ){ - CartesianCommunicator::Hugepages = 1; + GlobalSharedMemory::Hugepages = 1; } @@ -398,8 +398,8 @@ void Grid_init(int *argc,char ***argv) Grid_default_latt, Grid_default_mpi); - std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "< Make.inc echo >> Make.inc diff --git a/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc new file mode 100644 index 00000000..132dff4e --- /dev/null +++ b/tests/lanczos/Test_dwf_compressed_lanczos_reorg_synthetic.cc @@ -0,0 +1,330 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class ProjectedHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedHermOp(LinearOperatorBase& linop, Aggregation &aggregate) : + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + FineField fin(FineGrid); + FineField fout(FineGrid); + + _Aggregate.PromoteFromSubspace(in,fin); + _Linop.HermOp(fin,fout); + _Aggregate.ProjectToSubspace(out,fout); + } +}; + +template +class ProjectedFunctionHermOp : public LinearFunction > > { +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + Aggregation &_Aggregate; + + ProjectedFunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop, + Aggregation &aggregate) : + _poly(poly), + _Linop(linop), + _Aggregate(aggregate) { }; + + void operator()(const CoarseField& in, CoarseField& out) { + + GridBase *FineGrid = _Aggregate.FineGrid; + + FineField fin(FineGrid) ;fin.checkerboard =_Aggregate.checkerboard; + FineField fout(FineGrid);fout.checkerboard =_Aggregate.checkerboard; + + _Aggregate.PromoteFromSubspace(in,fin); + _poly(_Linop,fin,fout); + _Aggregate.ProjectToSubspace(out,fout); + } +}; + +// Make serializable Lanczos params + +template +class CoarseFineIRL +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice CoarseField; + typedef Lattice FineField; + +private: + GridBase *_CoarseGrid; + GridBase *_FineGrid; + int _checkerboard; + LinearOperatorBase & _FineOp; + Aggregation _Aggregate; + +public: + CoarseFineIRL(GridBase *FineGrid, + GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) : + _CoarseGrid(CoarseGrid), + _FineGrid(FineGrid), + _Aggregate(CoarseGrid,FineGrid,checkerboard), + _FineOp(FineOp), + _checkerboard(checkerboard) + {}; + + template static RealD normalise(T& v) + { + RealD nn = norm2(v); + nn = ::sqrt(nn); + v = v * (1.0/nn); + return nn; + } + + void testFine(void) + { + int Nk = nbasis; + _Aggregate.subspace.resize(Nk,_FineGrid); + _Aggregate.subspace[0]=1.0; + _Aggregate.subspace[0].checkerboard=_checkerboard; + normalise(_Aggregate.subspace[0]); + PlainHermOp Op(_FineOp); + for(int k=1;k Cheby(alpha,beta,Npoly); + FunctionHermOp ChebyOp(Cheby,_FineOp); + PlainHermOp Op(_FineOp); + + int Nk = nbasis; + + std::vector eval(Nm); + + FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + + ImplicitlyRestartedLanczos IRL(ChebyOp,Op,Nk,Nk,Nm,resid,MaxIt,betastp,MinRes); + _Aggregate.subspace.resize(Nm,_FineGrid); + IRL.calc(eval,_Aggregate.subspace,src,Nk,false); + _Aggregate.subspace.resize(Nk,_FineGrid); + for(int k=0;k Cheby(alpha,beta,Npoly); + ProjectedHermOp Op(_FineOp,_Aggregate); + ProjectedFunctionHermOp ChebyOp(Cheby,_FineOp,_Aggregate); + + std::vector eval(Nm); + std::vector evec(Nm,_CoarseGrid); + + CoarseField src(_CoarseGrid); src=1.0; + + ImplicitlyRestartedLanczos IRL(ChebyOp,ChebyOp,Nk,Nk,Nm,resid,MaxIt,betastp,MinRes); + IRL.calc(eval,evec,src,Nk,false); + + // We got the evalues of the Cheby operator; + // Reconstruct eigenvalues of original operator via Chebyshev inverse + for (int i=0;i, blockSize, + std::string, config, + std::vector < std::complex >, omega, + RealD, mass, + RealD, M5 + ); +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + + CompressedLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d seeds4({1,2,3,4}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + SU3::HotConfiguration(RNG4, Umu); + } + std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; + + // ZMobius EO Operator + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + const int Nm1 = fine.Nm; + const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nk << " full vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nk << " total vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 70; + CoarseFineIRL IRL(FrbGrid,CoarseGrid5rb,HermOp,Odd); + + std::cout << GridLogMessage << "Constructed CoarseFine IRL" << std::endl; + + std::cout << GridLogMessage << "Performing fine grid IRL Nk "<< nbasis<<" Nm "<