Merge branch 'develop' into feature/gpu-port

2025-09-19 17:51:04 +01:00 · 2019-07-16 11:55:17 +01:00
parent 7c11525d1a c3d0c176ab
commit fa9cd50c5b
274 changed files with 7120 additions and 4663 deletions
--- a/Grid/communicator/Communicator_mpi3.cc
+++ b/Grid/communicator/Communicator_mpi3.cc
@@ -44,10 +44,13 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
  MPI_Initialized(&flag); // needed to coexist with other libs apparently
  if ( !flag ) {
    MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided);
-    //    assert (provided == MPI_THREAD_MULTIPLE);
+
    //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE
-    if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) ||
-        (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) ) {
+    if( (nCommThreads == 1) && (provided == MPI_THREAD_SINGLE) ) {
+      assert(0);
+    }
+
+    if( (nCommThreads > 1) && (provided != MPI_THREAD_MULTIPLE) ) {
      assert(0);
    }
  }
@@ -55,9 +58,12 @@ void CartesianCommunicator::Init(int *argc, char ***argv)
  // Never clean up as done once.
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);

+  Grid_quiesce_nodes();
  GlobalSharedMemory::Init(communicator_world);
-  GlobalSharedMemory::SharedMemoryAllocate(GlobalSharedMemory::MAX_MPI_SHM_BYTES,
-					   GlobalSharedMemory::Hugepages);
+  GlobalSharedMemory::SharedMemoryAllocate(
+		   GlobalSharedMemory::MAX_MPI_SHM_BYTES,
+		   GlobalSharedMemory::Hugepages);
+  Grid_unquiesce_nodes();
 }

 ///////////////////////////////////////////////////////////////////////////
@@ -106,8 +112,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
 //////////////////////////////////
 CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank)    
 {
-  _ndimension = processors.size();
-
+  _ndimension = processors.size();  assert(_ndimension>=1);
  int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
  Coordinate parent_processor_coor(_ndimension,0);
  Coordinate parent_processors    (_ndimension,1);
--- a/Grid/communicator/Communicator_none.cc
+++ b/Grid/communicator/Communicator_none.cc
@@ -52,7 +52,7 @@ CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const
 CartesianCommunicator::CartesianCommunicator(const Coordinate &processors)
 {
  _processors = processors;
-  _ndimension = processors.size();
+  _ndimension = processors.size();  assert(_ndimension>=1);
  _processor_coor.resize(_ndimension);
  
  // Require 1^N processor grid for fake
--- a/Grid/communicator/SharedMemory.h
+++ b/Grid/communicator/SharedMemory.h
@@ -90,7 +90,9 @@ public:
  // Create an optimal reordered communicator that makes MPI_Cart_create get it right
  //////////////////////////////////////////////////////////////////////////////////////
  static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD
-  static void OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicator            (const Coordinate &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicatorHypercube   (const Coordinate &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
+  static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm);  // Turns MPI_COMM_WORLD into right layout for Cartesian
  ///////////////////////////////////////////////////
  // Provide shared memory facilities off comm world
  ///////////////////////////////////////////////////
--- a/Grid/communicator/SharedMemoryMPI.cc
+++ b/Grid/communicator/SharedMemoryMPI.cc
@@ -141,8 +141,22 @@ int Log2Size(int TwoToPower,int MAXLOG2)
 }
 void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
 {
-#ifdef HYPERCUBE
-#warning "HPE 8600 Hypercube optimisations enabled"
+  //////////////////////////////////////////////////////////////////////////////
+  // Look and see if it looks like an HPE 8600 based on hostname conventions
+  //////////////////////////////////////////////////////////////////////////////
+  const int namelen = _POSIX_HOST_NAME_MAX;
+  char name[namelen];
+  int R;
+  int I;
+  int N;
+  gethostname(name,namelen);
+  int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ;
+
+  if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm);
+  else         OptimalCommunicatorSharedMemory(processors,optimal_comm);
+}
+void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
+{
  ////////////////////////////////////////////////////////////////
  // Assert power of two shm_size.
  ////////////////////////////////////////////////////////////////
@@ -208,7 +222,8 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
  ////////////////////////////////////////////////////////////////
  int ndimension              = processors.size();
  std::vector<int> processor_coor(ndimension);
-  std::vector<int> WorldDims = processors;   std::vector<int> ShmDims  (ndimension,1);  std::vector<int> NodeDims (ndimension);
+  std::vector<int> WorldDims = processors.toVector();
+  std::vector<int> ShmDims  (ndimension,1);  std::vector<int> NodeDims (ndimension);
  std::vector<int> ShmCoor  (ndimension);    std::vector<int> NodeCoor (ndimension);    std::vector<int> WorldCoor(ndimension);
  std::vector<int> HyperCoor(ndimension);
  int dim = 0;
@@ -263,7 +278,9 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
  /////////////////////////////////////////////////////////////////
  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
  assert(ierr==0);
-#else 
+}
+void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm)
+{
  ////////////////////////////////////////////////////////////////
  // Assert power of two shm_size.
  ////////////////////////////////////////////////////////////////
@@ -316,7 +333,6 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
  /////////////////////////////////////////////////////////////////
  int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm);
  assert(ierr==0);
-#endif
 }
 ////////////////////////////////////////////////////////////////////////////////////////////
 // SHMGET
@@ -347,7 +363,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
        int errsv = errno;
        printf("Errno %d\n",errsv);
        printf("key   %d\n",key);
-        printf("size  %lld\n",size);
+        printf("size  %ld\n",size);
        printf("flags %d\n",flags);
        perror("shmget");
        exit(1);