merge upstream develop

2025-08-07 06:57:11 +01:00 · 2020-07-07 20:26:47 +02:00
parent 67db4993c2 64fe5b21b4
commit 8726e94ea7
326 changed files with 10335 additions and 9381 deletions
--- a/Grid/util/Coordinate.h
+++ b/Grid/util/Coordinate.h
@@ -52,14 +52,14 @@ public:
  accelerator_inline size_type size(void) const { return _size; };
  accelerator_inline void  clear(void) { resize(0);}
  accelerator_inline void  resize(size_type sz) {
+#ifndef GRID_HIP
    assert(sz>=0);
    assert(sz<=MaxEntries);
+#endif
    _size = sz;
  }
  accelerator_inline void  resize(size_type sz,const value &val) {
-    assert(sz>=0);
-    assert(sz<=MaxEntries);
-    _size = sz;
+    resize(sz);
    for(int s=0;s<sz;s++) _data[s]=val;
  }
  accelerator_inline pointer begin(void)                   { return &_data[0]; } 
@@ -67,7 +67,7 @@ public:
  accelerator_inline pointer end  (void)                   { return &_data[_size]; } 
  accelerator_inline const_pointer end  (void) const       { return &_data[_size]; } 
  accelerator_inline void push_back(const value &val)      { resize(_size+1); _data[_size-1] = val;}
-  accelerator_inline AcceleratorVector()                   { _size = 0; }
+  accelerator_inline AcceleratorVector()                   { resize(0); }
  accelerator_inline AcceleratorVector(size_type sz)           { resize(sz); }
  accelerator_inline AcceleratorVector(size_type sz,const value &val) { resize(sz,val); }
  AcceleratorVector(const std::vector<value> &copyme) { 
--- a/Grid/util/Init.cc
+++ b/Grid/util/Init.cc
@@ -73,8 +73,6 @@ feenableexcept (unsigned int excepts)
 }
 #endif

-uint32_t gpu_threads=8;
-
 NAMESPACE_BEGIN(Grid);

 //////////////////////////////////////////////////////
@@ -192,16 +190,12 @@ void GridParseLayout(char **argv,int argc,
    assert(ompthreads.size()==1);
    GridThread::SetThreads(ompthreads[0]);
  }
-  if( GridCmdOptionExists(argv,argv+argc,"--gpu-threads") ){
+  if( GridCmdOptionExists(argv,argv+argc,"--accelerator-threads") ){
    std::vector<int> gputhreads(0);
-#ifndef GRID_NVCC
-    std::cout << GridLogWarning << "'--gpu-threads' option used but Grid was"
-              << " not compiled with GPU support" << std::endl;
-#endif
-    arg= GridCmdOptionPayload(argv,argv+argc,"--gpu-threads");
+    arg= GridCmdOptionPayload(argv,argv+argc,"--accelerator-threads");
    GridCmdOptionIntVector(arg,gputhreads);
    assert(gputhreads.size()==1);
-    gpu_threads=gputhreads[0];
+    acceleratorThreads(gputhreads[0]);
  }

  if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
@@ -241,8 +235,6 @@ static int Grid_is_initialised;
 /////////////////////////////////////////////////////////
 void GridBanner(void)
 {
-  static int printed =0;
-  if( !printed ) {
    std::cout <<std::endl;
    std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl; 
    std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl; 
@@ -278,67 +270,6 @@ void GridBanner(void)
    std::cout << "Build " << GRID_BUILD_STR(GRID_BUILD_REF) << std::endl;
 #endif
    std::cout << std::endl;
-    printed=1;
-  }
-}
-#ifdef GRID_NVCC
-cudaDeviceProp *gpu_props;
-#endif
-void GridGpuInit(void)
-{
-#ifdef GRID_NVCC
-  int nDevices = 1;
-  cudaGetDeviceCount(&nDevices);
-  gpu_props = new cudaDeviceProp[nDevices];
-
-  char * localRankStr = NULL;
-  int rank = 0, world_rank=0; 
-#define ENV_LOCAL_RANK_OMPI    "OMPI_COMM_WORLD_LOCAL_RANK"
-#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK"
-#define ENV_RANK_OMPI          "OMPI_COMM_WORLD_RANK"
-#define ENV_RANK_MVAPICH       "MV2_COMM_WORLD_RANK"
-  // We extract the local rank initialization using an environment variable
-  if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
-  {
-    rank = atoi(localRankStr);		
-  }
-  if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL)
-  {
-    rank = atoi(localRankStr);		
-  }
-  if ((localRankStr = getenv(ENV_RANK_OMPI   )) != NULL) { world_rank = atoi(localRankStr);}
-  if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
-
-  if ( world_rank == 0 ) {
-    GridBanner();
-  }
-
-  for (int i = 0; i < nDevices; i++) {
-
-#define GPU_PROP_FMT(canMapHostMemory,FMT)     printf("GpuInit:   " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
-#define GPU_PROP(canMapHostMemory)             GPU_PROP_FMT(canMapHostMemory,"%d");
-    
-    cudaGetDeviceProperties(&gpu_props[i], i);
-    if ( world_rank == 0) {
-      cudaDeviceProp prop; 
-      prop = gpu_props[i];
-      printf("GpuInit: ========================\n");
-      printf("GpuInit: Device Number    : %d\n", i);
-      printf("GpuInit: ========================\n");
-      printf("GpuInit: Device identifier: %s\n", prop.name);
-
-      GPU_PROP(managedMemory);
-      GPU_PROP(isMultiGpuBoard);
-      GPU_PROP(warpSize);
-      //      GPU_PROP(unifiedAddressing);
-      //      GPU_PROP(l2CacheSize);
-      //      GPU_PROP(singleToDoublePrecisionPerfRatio);
-    }
-  }
-  if ( world_rank == 0 ) {
-    printf("GpuInit: ================================================\n");
-  }
-#endif
 }

 void Grid_init(int *argc,char ***argv)
@@ -353,7 +284,7 @@ void Grid_init(int *argc,char ***argv)
  //////////////////////////////////////////////////////////
  // Early intialisation necessities without rank knowledge
  //////////////////////////////////////////////////////////
-  GridGpuInit(); // Must come first to set device prior to MPI init
+  acceleratorInit(); // Must come first to set device prior to MPI init due to Omnipath Driver

  if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){
    int MB;
@@ -363,6 +294,14 @@ void Grid_init(int *argc,char ***argv)
    GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL;
  }

+  if( GridCmdOptionExists(*argv,*argv+*argc,"--device-mem") ){
+    int MB;
+    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--device-mem");
+    GridCmdOptionInt(arg,MB);
+    uint64_t MB64 = MB;
+    MemoryManager::DeviceMaxBytes = MB64*1024LL*1024LL;
+  }
+
  if( GridCmdOptionExists(*argv,*argv+*argc,"--hypercube") ){
    int enable;
    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--hypercube");
@@ -379,6 +318,11 @@ void Grid_init(int *argc,char ***argv)
    Grid_debug_handler_init();
  }

+  //////////////////////////////////////////////////////////
+  // Memory manager
+  //////////////////////////////////////////////////////////
+  MemoryManager::Init();
+
  //////////////////////////////////////////////////////////
  // MPI initialisation
  //////////////////////////////////////////////////////////
@@ -417,11 +361,18 @@ void Grid_init(int *argc,char ***argv)
  std::cout << GridLogMessage << "MPI is initialised and logging filters activated "<<std::endl;
  std::cout << GridLogMessage << "================================================ "<<std::endl;

+
+  /////////////////////////////////////////////////////////
+  // Reporting
+  /////////////////////////////////////////////////////////
  std::cout << GridLogMessage << "Requested "<< GlobalSharedMemory::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
  if ( GlobalSharedMemory::Hugepages) {
    std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
  }

+#ifndef GRID_UVM
+  std::cout << GridLogMessage << "MemoryManager Cache "<< MemoryManager::DeviceMaxBytes <<" bytes "<<std::endl;
+#endif

  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-mem") ){
    MemoryProfiler::debug = true;
--- a/Grid/util/Init.h
+++ b/Grid/util/Init.h
@@ -56,6 +56,7 @@ std::string GridCmdVectorIntToString(const VectorInt & vec);
 void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
 template<class VectorInt>
 void GridCmdOptionIntVector(std::string &str,VectorInt & vec);
+void GridCmdOptionInt(std::string &str,int & val);


 void GridParseLayout(char **argv,int argc,