mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-03 18:55:56 +01:00
325 lines
9.0 KiB
C++
325 lines
9.0 KiB
C++
#include <Grid/GridCore.h>
|
|
|
|
NAMESPACE_BEGIN(Grid);
|
|
|
|
/*Allocation types, saying which pointer cache should be used*/
|
|
#define Cpu (0)
|
|
#define CpuHuge (1)
|
|
#define CpuSmall (2)
|
|
#define Acc (3)
|
|
#define AccHuge (4)
|
|
#define AccSmall (5)
|
|
#define Shared (6)
|
|
#define SharedHuge (7)
|
|
#define SharedSmall (8)
|
|
#undef GRID_MM_VERBOSE
|
|
uint64_t total_shared;
|
|
uint64_t total_device;
|
|
uint64_t total_host;;
|
|
void MemoryManager::PrintBytes(void)
|
|
{
|
|
std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
|
|
std::cout << " MemoryManager : PrintBytes "<<std::endl;
|
|
std::cout << " MemoryManager : ------------------------------------ "<<std::endl;
|
|
std::cout << " MemoryManager : "<<(total_shared>>20)<<" shared Mbytes "<<std::endl;
|
|
std::cout << " MemoryManager : "<<(total_device>>20)<<" accelerator Mbytes "<<std::endl;
|
|
std::cout << " MemoryManager : "<<(total_host>>20) <<" cpu Mbytes "<<std::endl;
|
|
uint64_t cacheBytes;
|
|
cacheBytes = CacheBytes[Cpu];
|
|
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" cpu cache Mbytes "<<std::endl;
|
|
cacheBytes = CacheBytes[Acc];
|
|
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" acc cache Mbytes "<<std::endl;
|
|
cacheBytes = CacheBytes[Shared];
|
|
std::cout << " MemoryManager : "<<(cacheBytes>>20) <<" shared cache Mbytes "<<std::endl;
|
|
|
|
#ifdef GRID_CUDA
|
|
cuda_mem();
|
|
#endif
|
|
|
|
}
|
|
|
|
uint64_t MemoryManager::DeviceCacheBytes() { return CacheBytes[Acc] + CacheBytes[AccHuge] + CacheBytes[AccSmall]; }
|
|
uint64_t MemoryManager::HostCacheBytes() { return CacheBytes[Cpu] + CacheBytes[CpuHuge] + CacheBytes[CpuSmall]; }
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
// Data tables for recently freed pooiniter caches
|
|
//////////////////////////////////////////////////////////////////////
|
|
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
|
int MemoryManager::Victim[MemoryManager::NallocType];
|
|
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 0, 8, 8, 0, 16, 8, 0, 16 };
|
|
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
|
|
//////////////////////////////////////////////////////////////////////
|
|
// Actual allocation and deallocation utils
|
|
//////////////////////////////////////////////////////////////////////
|
|
void *MemoryManager::AcceleratorAllocate(size_t bytes)
|
|
{
|
|
total_device+=bytes;
|
|
void *ptr = (void *) Lookup(bytes,Acc);
|
|
if ( ptr == (void *) NULL ) {
|
|
ptr = (void *) acceleratorAllocDevice(bytes);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"AcceleratorAllocate "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
return ptr;
|
|
}
|
|
void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
|
|
{
|
|
total_device-=bytes;
|
|
void *__freeme = Insert(ptr,bytes,Acc);
|
|
if ( __freeme ) {
|
|
acceleratorFreeDevice(__freeme);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"AcceleratorFree "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
}
|
|
void *MemoryManager::SharedAllocate(size_t bytes)
|
|
{
|
|
total_shared+=bytes;
|
|
void *ptr = (void *) Lookup(bytes,Shared);
|
|
if ( ptr == (void *) NULL ) {
|
|
ptr = (void *) acceleratorAllocShared(bytes);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"SharedAllocate "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
return ptr;
|
|
}
|
|
void MemoryManager::SharedFree (void *ptr,size_t bytes)
|
|
{
|
|
total_shared-=bytes;
|
|
void *__freeme = Insert(ptr,bytes,Shared);
|
|
if ( __freeme ) {
|
|
acceleratorFreeShared(__freeme);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"SharedFree "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
}
|
|
#ifdef GRID_UVM
|
|
void *MemoryManager::CpuAllocate(size_t bytes)
|
|
{
|
|
total_host+=bytes;
|
|
void *ptr = (void *) Lookup(bytes,Cpu);
|
|
if ( ptr == (void *) NULL ) {
|
|
ptr = (void *) acceleratorAllocShared(bytes);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"CpuAllocate "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
return ptr;
|
|
}
|
|
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
|
{
|
|
total_host-=bytes;
|
|
NotifyDeletion(_ptr);
|
|
void *__freeme = Insert(_ptr,bytes,Cpu);
|
|
if ( __freeme ) {
|
|
acceleratorFreeShared(__freeme);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"CpuFree "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
}
|
|
#else
|
|
void *MemoryManager::CpuAllocate(size_t bytes)
|
|
{
|
|
total_host+=bytes;
|
|
void *ptr = (void *) Lookup(bytes,Cpu);
|
|
if ( ptr == (void *) NULL ) {
|
|
ptr = (void *) acceleratorAllocCpu(bytes);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"CpuAllocate "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
return ptr;
|
|
}
|
|
void MemoryManager::CpuFree (void *_ptr,size_t bytes)
|
|
{
|
|
total_host-=bytes;
|
|
NotifyDeletion(_ptr);
|
|
void *__freeme = Insert(_ptr,bytes,Cpu);
|
|
if ( __freeme ) {
|
|
acceleratorFreeCpu(__freeme);
|
|
}
|
|
#ifdef GRID_MM_VERBOSE
|
|
std::cout <<"CpuFree "<<std::endl;
|
|
PrintBytes();
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
//////////////////////////////////////////
|
|
// call only once
|
|
//////////////////////////////////////////
|
|
void MemoryManager::Init(void)
|
|
{
|
|
|
|
char * str;
|
|
int Nc;
|
|
|
|
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
|
if ( str ) {
|
|
Nc = atoi(str);
|
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
|
Ncache[Cpu]=Nc;
|
|
Ncache[Acc]=Nc;
|
|
Ncache[Shared]=Nc;
|
|
}
|
|
}
|
|
|
|
str= getenv("GRID_ALLOC_NCACHE_HUGE");
|
|
if ( str ) {
|
|
Nc = atoi(str);
|
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
|
Ncache[CpuHuge]=Nc;
|
|
Ncache[AccHuge]=Nc;
|
|
Ncache[SharedHuge]=Nc;
|
|
}
|
|
}
|
|
|
|
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
|
if ( str ) {
|
|
Nc = atoi(str);
|
|
if ( (Nc>=0) && (Nc < NallocCacheMax)) {
|
|
Ncache[CpuSmall]=Nc;
|
|
Ncache[AccSmall]=Nc;
|
|
Ncache[SharedSmall]=Nc;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void MemoryManager::InitMessage(void) {
|
|
|
|
#ifndef GRID_UVM
|
|
std::cout << GridLogMessage << "MemoryManager Cache "<< MemoryManager::DeviceMaxBytes <<" bytes "<<std::endl;
|
|
#endif
|
|
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<<std::endl;
|
|
#ifdef ALLOCATION_CACHE
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent host allocations: SMALL "<<Ncache[CpuSmall]<<" LARGE "<<Ncache[Cpu]<<" HUGE "<<Ncache[CpuHuge]<<std::endl;
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent device allocations: SMALL "<<Ncache[AccSmall]<<" LARGE "<<Ncache[Acc]<<" Huge "<<Ncache[AccHuge]<<std::endl;
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() cache pool for recent shared allocations: SMALL "<<Ncache[SharedSmall]<<" LARGE "<<Ncache[Shared]<<" Huge "<<Ncache[SharedHuge]<<std::endl;
|
|
#endif
|
|
|
|
#ifdef GRID_UVM
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Unified memory space"<<std::endl;
|
|
#ifdef GRID_CUDA
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMallocManaged"<<std::endl;
|
|
#endif
|
|
#ifdef GRID_HIP
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMallocManaged"<<std::endl;
|
|
#endif
|
|
#ifdef GRID_SYCL
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_shared"<<std::endl;
|
|
#endif
|
|
#else
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory"<<std::endl;
|
|
#ifdef GRID_CUDA
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using cudaMalloc"<<std::endl;
|
|
#endif
|
|
#ifdef GRID_HIP
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using hipMalloc"<<std::endl;
|
|
#endif
|
|
#ifdef GRID_SYCL
|
|
std::cout << GridLogMessage<< "MemoryManager::Init() Using SYCL malloc_device"<<std::endl;
|
|
#endif
|
|
#endif
|
|
|
|
}
|
|
|
|
void *MemoryManager::Insert(void *ptr,size_t bytes,int type)
|
|
{
|
|
#ifdef ALLOCATION_CACHE
|
|
int cache;
|
|
if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2;
|
|
else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1;
|
|
else cache = type;
|
|
|
|
return Insert(ptr,bytes,Entries[cache],Ncache[cache],Victim[cache],CacheBytes[cache]);
|
|
#else
|
|
return ptr;
|
|
#endif
|
|
}
|
|
|
|
void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim, uint64_t &cacheBytes)
|
|
{
|
|
#ifdef GRID_OMP
|
|
assert(omp_in_parallel()==0);
|
|
#endif
|
|
|
|
if (ncache == 0) return ptr;
|
|
|
|
void * ret = NULL;
|
|
int v = -1;
|
|
|
|
for(int e=0;e<ncache;e++) {
|
|
if ( entries[e].valid==0 ) {
|
|
v=e;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( v==-1 ) {
|
|
v=victim;
|
|
victim = (victim+1)%ncache;
|
|
}
|
|
|
|
if ( entries[v].valid ) {
|
|
ret = entries[v].address;
|
|
cacheBytes -= entries[v].bytes;
|
|
entries[v].valid = 0;
|
|
entries[v].address = NULL;
|
|
entries[v].bytes = 0;
|
|
}
|
|
|
|
entries[v].address=ptr;
|
|
entries[v].bytes =bytes;
|
|
entries[v].valid =1;
|
|
cacheBytes += bytes;
|
|
|
|
return ret;
|
|
}
|
|
|
|
void *MemoryManager::Lookup(size_t bytes,int type)
|
|
{
|
|
#ifdef ALLOCATION_CACHE
|
|
int cache;
|
|
if (bytes < GRID_ALLOC_SMALL_LIMIT) cache = type + 2;
|
|
else if (bytes >= GRID_ALLOC_HUGE_LIMIT) cache = type + 1;
|
|
else cache = type;
|
|
|
|
return Lookup(bytes,Entries[cache],Ncache[cache],CacheBytes[cache]);
|
|
#else
|
|
return NULL;
|
|
#endif
|
|
}
|
|
|
|
void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache,uint64_t & cacheBytes)
|
|
{
|
|
#ifdef GRID_OMP
|
|
assert(omp_in_parallel()==0);
|
|
#endif
|
|
for(int e=0;e<ncache;e++){
|
|
if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
|
|
entries[e].valid = 0;
|
|
cacheBytes -= entries[e].bytes;
|
|
return entries[e].address;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
NAMESPACE_END(Grid);
|
|
|