Priintinig and device memory size detection

2025-07-27 17:57:08 +01:00 · 2020-06-04 14:58:03 -04:00
parent 0d95805cde
commit f39c2a240b
4 changed files with 19 additions and 4 deletions
--- a/Grid/allocator/MemoryManager.h
+++ b/Grid/allocator/MemoryManager.h
@@ -101,9 +101,6 @@ private:
  static void *CpuAllocate(size_t bytes);
  static void  CpuFree    (void *ptr,size_t bytes);

-
- private:
-
  ////////////////////////////////////////////////////////
  // Footprint tracking
  ////////////////////////////////////////////////////////
@@ -115,6 +112,7 @@ private:
  static uint64_t     HostToDeviceXfer;
  static uint64_t     DeviceToHostXfer;
 
+ private:
 #ifndef GRID_UVM
  //////////////////////////////////////////////////////////////////////
  // Data tables for ViewCache
--- a/Grid/allocator/MemoryManagerCache.cc
+++ b/Grid/allocator/MemoryManagerCache.cc
@@ -439,7 +439,7 @@ void  MemoryManager::Print(void)
  std::cout << GridLogDebug << DeviceToHostXfer << " transfers        from device " << std::endl;
  std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to   device " << std::endl;
  std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl;
-  std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << std::endl;
+  std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl;
  std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
  std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
  std::cout << GridLogDebug << "--------------------------------------------" << std::endl;
--- a/Grid/threads/Accelerator.cc
+++ b/Grid/threads/Accelerator.cc
@@ -31,6 +31,7 @@ void acceleratorInit(void)
  if ((localRankStr = getenv(ENV_RANK_OMPI   )) != NULL) { world_rank = atoi(localRankStr);}
  if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}

+  size_t totalDeviceMem=0;
  for (int i = 0; i < nDevices; i++) {

 #define GPU_PROP_FMT(canMapHostMemory,FMT)     printf("AcceleratorCudaInit:   " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
@@ -45,14 +46,18 @@ void acceleratorInit(void)
      printf("AcceleratorCudaInit: ========================\n");
      printf("AcceleratorCudaInit: Device identifier: %s\n", prop.name);

+      GPU_PROP_FMT(totalGlobalMem,"%lld");
      GPU_PROP(managedMemory);
      GPU_PROP(isMultiGpuBoard);
      GPU_PROP(warpSize);
+      totalDeviceMem = prop.totalGlobalMem;
      //      GPU_PROP(unifiedAddressing);
      //      GPU_PROP(l2CacheSize);
      //      GPU_PROP(singleToDoublePrecisionPerfRatio);
    }
  }
+  MemoryManager::DeviceMaxBytes = (8*totalDeviceMem)/10; // Assume 80% ours
+
 #ifdef GRID_IBM_SUMMIT
  // IBM Jsrun makes cuda Device numbering screwy and not match rank
  if ( world_rank == 0 )  printf("AcceleratorCudaInit: IBM Summit or similar - NOT setting device to node rank\n");
--- a/Grid/util/Init.cc
+++ b/Grid/util/Init.cc
@@ -296,6 +296,14 @@ void Grid_init(int *argc,char ***argv)
    GlobalSharedMemory::MAX_MPI_SHM_BYTES = MB64*1024LL*1024LL;
  }

+  if( GridCmdOptionExists(*argv,*argv+*argc,"--device-mem") ){
+    int GB;
+    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--device-mem");
+    GridCmdOptionInt(arg,GB);
+    uint64_t GB64 = GB;
+    MemoryManager::DeviceMaxBytes = GB64*1024LL*1024LL*1024LL;
+  }
+
  if( GridCmdOptionExists(*argv,*argv+*argc,"--hypercube") ){
    int enable;
    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--hypercube");
@@ -355,6 +363,10 @@ void Grid_init(int *argc,char ***argv)
    std::cout << GridLogMessage << "Mapped stencil comms buffers as MAP_HUGETLB "<<std::endl;
  }

+#ifndef GRID_UVM
+  std::cout << GridLogMessage << "MemoryManager Cache "<< MemoryManager::DeviceMaxBytes <<" bytes "<<std::endl;
+#endif
+
  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-mem") ){
    MemoryProfiler::debug = true;
    MemoryProfiler::stats = &dbgMemStats;