Much bigger pointer cache in case of Nvidia due to cost of setting up UVM allocations

2025-07-19 06:07:06 +01:00 · 2020-01-27 12:41:16 -05:00
parent 7c061e20c9
commit afc7426f39
2 changed files with 16 additions and 5 deletions
--- a/Grid/allocator/AlignedAllocator.cc
+++ b/Grid/allocator/AlignedAllocator.cc
@@ -6,6 +6,12 @@ NAMESPACE_BEGIN(Grid);
 MemoryStats *MemoryProfiler::stats = nullptr;
 bool         MemoryProfiler::debug = false;

+#ifdef GRID_NVCC
+#define SMALL_LIMIT (0)
+#else
+#define SMALL_LIMIT (4096)
+#endif
+
 #ifdef POINTER_CACHE
 int PointerCache::victim;

@@ -13,7 +19,7 @@ PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];

 void *PointerCache::Insert(void *ptr,size_t bytes) {

-  if (bytes < 4096 ) return ptr;
+  if (bytes < SMALL_LIMIT ) return ptr;

 #ifdef GRID_OMP
  assert(omp_in_parallel()==0);
@@ -50,7 +56,7 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {

 void *PointerCache::Lookup(size_t bytes) {

-  if (bytes < 4096 ) return NULL;
+  if (bytes < SMALL_LIMIT ) return NULL;

 #ifdef GRID_OMP
  assert(omp_in_parallel()==0);
--- a/Grid/allocator/AlignedAllocator.h
+++ b/Grid/allocator/AlignedAllocator.h
@@ -49,8 +49,13 @@ NAMESPACE_BEGIN(Grid);
 #ifdef POINTER_CACHE
 class PointerCache {
 private:
-
+/*Pinning pages is costly*/
+/*Could maintain separate large and small allocation caches*/
+#ifdef GRID_NVCC 
+  static const int Ncache=128;
+#else
  static const int Ncache=8;
+#endif
  static int victim;

  typedef struct { 
@@ -63,7 +68,6 @@ private:

 public:

-
  static void *Insert(void *ptr,size_t bytes) ;
  static void *Lookup(size_t bytes) ;

@@ -170,13 +174,14 @@ public:
    // Unified (managed) memory
    ////////////////////////////////////
    if ( ptr == (_Tp *) NULL ) {
+      //      printf(" alignedAllocater cache miss %ld bytes ",bytes);      BACKTRACEFP(stdout);
      auto err = cudaMallocManaged((void **)&ptr,bytes);
      if( err != cudaSuccess ) {
 	ptr = (_Tp *) NULL;
 	std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
 	assert(0);
      }
-    }
+    } 
    assert( ptr != (_Tp *)NULL);
 #else 
    //////////////////////////////////////////////////////////////////////////////////////////