mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
MallocManaged in GPU
This commit is contained in:
parent
38eadee2c9
commit
b1c02ec310
@ -40,11 +40,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define POINTER_CACHE
|
||||||
|
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
// Move control to configure.ac and Config.h?
|
// Move control to configure.ac and Config.h?
|
||||||
#undef POINTER_CACHE
|
|
||||||
#ifdef POINTER_CACHE
|
#ifdef POINTER_CACHE
|
||||||
class PointerCache {
|
class PointerCache {
|
||||||
private:
|
private:
|
||||||
@ -163,22 +164,28 @@ public:
|
|||||||
#else
|
#else
|
||||||
pointer ptr = nullptr;
|
pointer ptr = nullptr;
|
||||||
#endif
|
#endif
|
||||||
// if ( ptr != NULL )
|
|
||||||
// std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
|
|
||||||
|
|
||||||
//////////////////
|
//////////////////
|
||||||
// Hack 2MB align; could make option probably doesn't need configurability
|
// Hack 2MB align; could make option probably doesn't need configurability
|
||||||
//////////////////
|
//////////////////
|
||||||
//define GRID_ALLOC_ALIGN (128)
|
#ifdef GRID_NVCC
|
||||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
if ( ptr == (_Tp *) NULL ) {
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
if( cudaMallocManaged((void **)&ptr,bytes) != cudaSuccess ) {
|
||||||
|
ptr = (_Tp *) NULL;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
||||||
#else
|
#else
|
||||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
// std::cout << "alignedAllocator allocate " << std::hex << ptr <<std::dec <<std::endl;
|
assert( ptr != (_Tp *)NULL);
|
||||||
|
/////////////////////////////////////////
|
||||||
// First touch optimise in threaded loop
|
// First touch optimise in threaded loop
|
||||||
|
/////////////////////////////////////////
|
||||||
uint8_t *cp = (uint8_t *)ptr;
|
uint8_t *cp = (uint8_t *)ptr;
|
||||||
thread_loop( (size_type n=0;n<bytes;n+=4096) , {
|
thread_loop( (size_type n=0;n<bytes;n+=4096) , {
|
||||||
cp[n]=0;
|
cp[n]=0;
|
||||||
@ -196,14 +203,17 @@ public:
|
|||||||
#else
|
#else
|
||||||
pointer __freeme = __p;
|
pointer __freeme = __p;
|
||||||
#endif
|
#endif
|
||||||
// if ( __freeme ) {
|
|
||||||
// std::cout << "alignedAllocator free:" << std::hex << __p <<std::dec <<std::endl;
|
#ifdef GRID_NVCC
|
||||||
// }
|
if ( __freeme ) cudaFree((void *)__freeme);
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#else
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
if ( __freeme ) _mm_free((void *)__freeme);
|
if ( __freeme ) _mm_free((void *)__freeme);
|
||||||
#else
|
#else
|
||||||
if ( __freeme ) free((void *)__freeme);
|
if ( __freeme ) free((void *)__freeme);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
void construct(pointer __p, const _Tp& __val) { };
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
void construct(pointer __p) { };
|
void construct(pointer __p) { };
|
||||||
@ -212,113 +222,6 @@ public:
|
|||||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
// Deprecate shmem and comm allocator
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// MPI3 : comms must use shm region
|
|
||||||
// SHMEM: comms must use symmetric heap
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
|
||||||
extern "C" {
|
|
||||||
#include <mpp/shmem.h>
|
|
||||||
extern void * shmem_align(size_t, size_t);
|
|
||||||
extern void shmem_free(void *);
|
|
||||||
}
|
|
||||||
#define PARANOID_SYMMETRIC_HEAP
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
template<typename _Tp>
|
|
||||||
class commAllocator {
|
|
||||||
public:
|
|
||||||
typedef std::size_t size_type;
|
|
||||||
typedef std::ptrdiff_t difference_type;
|
|
||||||
typedef _Tp* pointer;
|
|
||||||
typedef const _Tp* const_pointer;
|
|
||||||
typedef _Tp& reference;
|
|
||||||
typedef const _Tp& const_reference;
|
|
||||||
typedef _Tp value_type;
|
|
||||||
|
|
||||||
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
|
|
||||||
commAllocator() throw() { }
|
|
||||||
commAllocator(const commAllocator&) throw() { }
|
|
||||||
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
|
|
||||||
~commAllocator() throw() { }
|
|
||||||
pointer address(reference __x) const { return &__x; }
|
|
||||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
|
||||||
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
|
||||||
pointer allocate(size_type __n, const void* _p= 0)
|
|
||||||
{
|
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
|
||||||
|
|
||||||
profilerAllocate(bytes);
|
|
||||||
#ifdef CRAY
|
|
||||||
_Tp *ptr = (_Tp *) shmem_align(bytes,64);
|
|
||||||
#else
|
|
||||||
_Tp *ptr = (_Tp *) shmem_align(64,bytes);
|
|
||||||
#endif
|
|
||||||
#ifdef PARANOID_SYMMETRIC_HEAP
|
|
||||||
static void * bcast;
|
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
|
||||||
|
|
||||||
bcast = (void *) ptr;
|
|
||||||
shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
|
|
||||||
|
|
||||||
if ( bcast != ptr ) {
|
|
||||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
|
||||||
// BACKTRACEFILE();
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
assert( bcast == (void *) ptr);
|
|
||||||
#endif
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
void deallocate(pointer __p, size_type __n) {
|
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
|
||||||
|
|
||||||
profilerFree(bytes);
|
|
||||||
shmem_free((void *)__p);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
pointer allocate(size_type __n, const void* _p= 0)
|
|
||||||
{
|
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
|
||||||
|
|
||||||
profilerAllocate(bytes);
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
|
||||||
_Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN);
|
|
||||||
#else
|
|
||||||
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes);
|
|
||||||
#endif
|
|
||||||
uint8_t *cp = (uint8_t *)ptr;
|
|
||||||
if ( ptr ) {
|
|
||||||
// One touch per 4k page, static OMP loop to catch same loop order
|
|
||||||
thread_loop( (size_type n=0;n<bytes;n+=4096),{
|
|
||||||
cp[n]=0;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
void deallocate(pointer __p, size_type __n) {
|
|
||||||
size_type bytes = __n*sizeof(_Tp);
|
|
||||||
|
|
||||||
profilerFree(bytes);
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
|
||||||
_mm_free((void *)__p);
|
|
||||||
#else
|
|
||||||
free((void *)__p);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
void construct(pointer __p, const _Tp& __val) { };
|
|
||||||
void construct(pointer __p) { };
|
|
||||||
void destroy(pointer __p) { };
|
|
||||||
};
|
|
||||||
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
|
|
||||||
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Template typedefs
|
// Template typedefs
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
Loading…
x
Reference in New Issue
Block a user