1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

SlabAllocator

This commit is contained in:
Christoph Lehner 2020-09-13 05:56:02 -04:00
parent 51d1beb1f3
commit 01652d8cfe
2 changed files with 163 additions and 11 deletions

View File

@ -153,18 +153,7 @@ inline void *acceleratorAllocShared(size_t bytes)
}
return ptr;
};
inline void *acceleratorAllocDevice(size_t bytes)
{
void *ptr=NULL;
auto err = cudaMalloc((void **)&ptr,bytes);
if( err != cudaSuccess ) {
ptr = (void *) NULL;
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
}
return ptr;
};
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
inline int acceleratorIsCommunicable(void *ptr)
@ -176,6 +165,8 @@ inline int acceleratorIsCommunicable(void *ptr)
if(uvm) return 0;
else return 1;
}
void *acceleratorAllocDevice(size_t bytes);
void acceleratorFreeDevice(void* ptr);
#endif

View File

@ -0,0 +1,161 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/threads/SlabAllocator.cc
Copyright (C) 2020
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
#include <unordered_set>
NAMESPACE_BEGIN(Grid);
#ifdef GRID_CUDA
#define GRID_DEVICE_HEAP_SLAB_THRESHOLD (1024*1024)
#define GRID_DEVICE_HEAP_SLAB_SIZE (2*1024*1024)
void *acceleratorAllocDeviceCUDA(size_t bytes) {
void *ptr=NULL;
auto err = cudaMalloc((void **)&ptr,bytes);
if( err != cudaSuccess ) {
ptr = (void *) NULL;
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
}
return ptr;
}
void acceleratorFreeDeviceCUDA(void *ptr) {
cudaFree(ptr);
}
struct grid_device_heap_slab_t {
void* Ptr;
size_t ElementSize;
size_t Elements;
std::unordered_set<uint32_t> Allocated;
std::unordered_set<uint32_t> Available;
};
std::unordered_map<void*, grid_device_heap_slab_t*> DeviceHeapPtrTable;
std::unordered_map<size_t, std::unordered_set<grid_device_heap_slab_t*> > DeviceHeapSlabTable;
void* SlabAllocateElement(grid_device_heap_slab_t* slab) {
assert(!slab->Available.empty());
auto available = slab->Available.begin();
auto slot = *available;
slab->Allocated.insert(slot);
slab->Available.erase(available);
void* Ptr = (void*)((char*)slab->Ptr + slot * slab->ElementSize);
DeviceHeapPtrTable[Ptr] = slab;
//std::cout << "Allocate element " << slot << " of slab " << slab << " of size " << slab->ElementSize << " with elements " << slab->Elements <<
// " (allocated = " << slab->Allocated.size() << ", available = " << slab->Available.size() << ")" << std::endl;
return Ptr;
}
void SlabRemove(grid_device_heap_slab_t* slab) {
auto & t = DeviceHeapSlabTable[slab->ElementSize];
assert(slab->Ptr);
DeviceHeapPtrTable.erase(slab->Ptr);
acceleratorFreeDeviceCUDA(slab->Ptr);
assert(t.count(slab) == 1);
t.erase(slab);
delete slab;
//std::cout << "Remove slab " << slab << std::endl;
}
void SlabFreeElement(grid_device_heap_slab_t* slab, void* ElementPtr) {
size_t Offset = (size_t)ElementPtr - (size_t)slab->Ptr;
//std::cout << "SlabFreeElement offset " << Offset << std::endl;
assert(Offset < GRID_DEVICE_HEAP_SLAB_SIZE);
assert(Offset % slab->ElementSize == 0);
size_t slot = Offset / slab->ElementSize;
assert(slot >= 0);
assert(slab->Allocated.count(slot) == 1 && slab->Available.count(slot) == 0);
slab->Allocated.erase(slot);
slab->Available.insert(slot);
//std::cout << "Free element " << slot << " of slab" << slab << std::endl;
if (slab->Allocated.empty()) {
SlabRemove(slab);
}
}
grid_device_heap_slab_t* SlabFind(size_t bytes) {
grid_device_heap_slab_t* slab = 0;
std::unordered_set<grid_device_heap_slab_t*>* slab_set = 0;
decltype(DeviceHeapSlabTable.begin()) slabs = DeviceHeapSlabTable.find(bytes);
if (slabs == DeviceHeapSlabTable.end()) {
slab_set = &DeviceHeapSlabTable[bytes];
} else {
slab_set = &slabs->second;
}
for (auto& s : *slab_set) {
if (!s->Available.empty()) {
slab = &(*s);
break;
}
}
if (!slab) {
slab = new grid_device_heap_slab_t;
slab_set->insert(slab);
slab->Ptr = acceleratorAllocDeviceCUDA(GRID_DEVICE_HEAP_SLAB_SIZE);
slab->ElementSize = bytes;
slab->Elements = GRID_DEVICE_HEAP_SLAB_SIZE / bytes;
for (size_t i=0;i<slab->Elements;i++)
slab->Available.insert(i);
//std::cout << "New slab" << slab << std::endl;
}
return slab;
}
void *acceleratorAllocDevice(size_t bytes) {
if (bytes >= GRID_DEVICE_HEAP_SLAB_THRESHOLD) {
return acceleratorAllocDeviceCUDA(bytes);
}
return SlabAllocateElement(SlabFind(bytes));
}
void acceleratorFreeDevice(void *ptr) {
auto p = DeviceHeapPtrTable.find(ptr);
if (p == DeviceHeapPtrTable.end()) {
acceleratorFreeDeviceCUDA(ptr);
} else {
SlabFreeElement(p->second,ptr);
}
}
#endif
NAMESPACE_END(Grid);