mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
MallocManaged in GPU
This commit is contained in:
parent
38eadee2c9
commit
b1c02ec310
@ -40,11 +40,12 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
#define POINTER_CACHE
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Move control to configure.ac and Config.h?
|
||||
#undef POINTER_CACHE
|
||||
#ifdef POINTER_CACHE
|
||||
class PointerCache {
|
||||
private:
|
||||
@ -163,22 +164,28 @@ public:
|
||||
#else
|
||||
pointer ptr = nullptr;
|
||||
#endif
|
||||
// if ( ptr != NULL )
|
||||
// std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <<std::dec <<std::endl;
|
||||
|
||||
//////////////////
|
||||
// Hack 2MB align; could make option probably doesn't need configurability
|
||||
//////////////////
|
||||
//define GRID_ALLOC_ALIGN (128)
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
#ifdef GRID_NVCC
|
||||
if ( ptr == (_Tp *) NULL ) {
|
||||
if( cudaMallocManaged((void **)&ptr,bytes) != cudaSuccess ) {
|
||||
ptr = (_Tp *) NULL;
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
#else
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
||||
#endif
|
||||
#endif
|
||||
// std::cout << "alignedAllocator allocate " << std::hex << ptr <<std::dec <<std::endl;
|
||||
|
||||
assert( ptr != (_Tp *)NULL);
|
||||
/////////////////////////////////////////
|
||||
// First touch optimise in threaded loop
|
||||
/////////////////////////////////////////
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
thread_loop( (size_type n=0;n<bytes;n+=4096) , {
|
||||
cp[n]=0;
|
||||
@ -196,14 +203,17 @@ public:
|
||||
#else
|
||||
pointer __freeme = __p;
|
||||
#endif
|
||||
// if ( __freeme ) {
|
||||
// std::cout << "alignedAllocator free:" << std::hex << __p <<std::dec <<std::endl;
|
||||
// }
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
if ( __freeme ) cudaFree((void *)__freeme);
|
||||
#else
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( __freeme ) _mm_free((void *)__freeme);
|
||||
#else
|
||||
#else
|
||||
if ( __freeme ) free((void *)__freeme);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
@ -212,113 +222,6 @@ public:
|
||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
// Deprecate shmem and comm allocator
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MPI3 : comms must use shm region
|
||||
// SHMEM: comms must use symmetric heap
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
extern "C" {
|
||||
#include <mpp/shmem.h>
|
||||
extern void * shmem_align(size_t, size_t);
|
||||
extern void shmem_free(void *);
|
||||
}
|
||||
#define PARANOID_SYMMETRIC_HEAP
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
template<typename _Tp>
|
||||
class commAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
|
||||
commAllocator() throw() { }
|
||||
commAllocator(const commAllocator&) throw() { }
|
||||
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
|
||||
~commAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerAllocate(bytes);
|
||||
#ifdef CRAY
|
||||
_Tp *ptr = (_Tp *) shmem_align(bytes,64);
|
||||
#else
|
||||
_Tp *ptr = (_Tp *) shmem_align(64,bytes);
|
||||
#endif
|
||||
#ifdef PARANOID_SYMMETRIC_HEAP
|
||||
static void * bcast;
|
||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||
|
||||
bcast = (void *) ptr;
|
||||
shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync);
|
||||
|
||||
if ( bcast != ptr ) {
|
||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||
// BACKTRACEFILE();
|
||||
exit(0);
|
||||
}
|
||||
assert( bcast == (void *) ptr);
|
||||
#endif
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerFree(bytes);
|
||||
shmem_free((void *)__p);
|
||||
}
|
||||
#else
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerAllocate(bytes);
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
_Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
_Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes);
|
||||
#endif
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
if ( ptr ) {
|
||||
// One touch per 4k page, static OMP loop to catch same loop order
|
||||
thread_loop( (size_type n=0;n<bytes;n+=4096),{
|
||||
cp[n]=0;
|
||||
});
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
|
||||
profilerFree(bytes);
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
_mm_free((void *)__p);
|
||||
#else
|
||||
free((void *)__p);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Template typedefs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
Loading…
x
Reference in New Issue
Block a user