Allocator cache spliit into large/small pools

2025-07-22 15:47:06 +01:00 · 2020-05-10 05:24:26 -04:00
parent 2bb2c68e15
commit ea08f193e7
4 changed files with 15 additions and 5 deletions
--- a/Grid/algorithms/approx/Chebyshev.h
+++ b/Grid/algorithms/approx/Chebyshev.h
@@ -236,7 +236,6 @@ public:

    int vol=grid->gSites();
    typedef typename Field::vector_type vector_type;
-    constexpr int Nsimd = vector_type::Nsimd();

    Field T0(grid); T0 = in;  
    Field T1(grid); 
@@ -264,6 +263,7 @@ public:
      auto Tn_v = Tn->View();
      auto Tnp_v = Tnp->View();
      auto Tnm_v = Tnm->View();
+      constexpr int Nsimd = vector_type::Nsimd();
      accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
 	  coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
 	  coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
--- a/Grid/allocator/AlignedAllocator.cc
+++ b/Grid/allocator/AlignedAllocator.cc
@@ -7,7 +7,11 @@ MemoryStats *MemoryProfiler::stats = nullptr;
 bool         MemoryProfiler::debug = false;

 int PointerCache::NcacheSmall = PointerCache::NcacheSmallMax;
-int PointerCache::Ncache      = PointerCache::NcacheMax;
+#ifdef GRID_CUDA
+int PointerCache::Ncache      = 32;
+#else 
+int PointerCache::Ncache      = 8;
+#endif
 int PointerCache::Victim;
 int PointerCache::VictimSmall;
 PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::NcacheMax];
@@ -16,12 +20,16 @@ PointerCache::PointerCacheEntry PointerCache::EntriesSmall[PointerCache::NcacheS
 void PointerCache::Init(void)
 {
  char * str;
+
  str= getenv("GRID_ALLOC_NCACHE_LARGE");
  if ( str ) Ncache = atoi(str);
  if ( (Ncache<0) || (Ncache > NcacheMax)) Ncache = NcacheMax;
+
  str= getenv("GRID_ALLOC_NCACHE_SMALL");
  if ( str ) NcacheSmall = atoi(str);
  if ( (NcacheSmall<0) || (NcacheSmall > NcacheSmallMax)) NcacheSmall = NcacheSmallMax;
+
+  //  printf("Aligned alloocator cache: large %d/%d small %d/%d\n",Ncache,NcacheMax,NcacheSmall,NcacheSmallMax);
 }
 void *PointerCache::Insert(void *ptr,size_t bytes) 
 {
--- a/Grid/communicator/SharedMemory.cc
+++ b/Grid/communicator/SharedMemory.cc
@@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){
  if (heap_bytes >= heap_size) {
    std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
    std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
-    std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
+    std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
+    std::cout<< " Current bytes is " << (heap_bytes/(1024*1024)) <<"MB"<<std::endl;
+    std::cout<< " Current heap  is " << (heap_size/(1024*1024)) <<"MB"<<std::endl;
    assert(heap_bytes<heap_size);
  }
  //std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;
--- a/benchmarks/Benchmark_schur.cc
+++ b/benchmarks/Benchmark_schur.cc
@@ -47,7 +47,7 @@ int main (int argc, char ** argv)

  const int Ls=12;
  std::vector< std::vector<int> > latts;
-#if 0
+#if 1
  latts.push_back(std::vector<int> ({24,24,24,24}) );
  latts.push_back(std::vector<int> ({48,24,24,24}) );
  latts.push_back(std::vector<int> ({96,24,24,24}) );
@@ -157,7 +157,7 @@ void benchDw(std::vector<int> & latt4, int Ls)
    std::cout <<"\t"<<flops/(t1-t0)<<"\t"<<(t1-t0)/1000./1000.<<" s\t";

    // Cheby uses MpcDagMpc so 2x flops
-    for(int i=0;i<100;i++){
+    for(int i=0;i<1;i++){
    Cheby(Mpc,src_o,r_o);
    t0=usecond();
    Cheby(Mpc,src_o,r_o);