mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 07:47:06 +01:00
Merge branch 'develop' into feature/gpu-port
This commit is contained in:
127
Grid/allocator/AlignedAllocator.cc
Normal file
127
Grid/allocator/AlignedAllocator.cc
Normal file
@ -0,0 +1,127 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
MemoryStats *MemoryProfiler::stats = nullptr;
|
||||
bool MemoryProfiler::debug = false;
|
||||
|
||||
#ifdef POINTER_CACHE
|
||||
int PointerCache::victim;
|
||||
|
||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
|
||||
|
||||
void *PointerCache::Insert(void *ptr,size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return ptr;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
void * ret = NULL;
|
||||
int v = -1;
|
||||
|
||||
for(int e=0;e<Ncache;e++) {
|
||||
if ( Entries[e].valid==0 ) {
|
||||
v=e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( v==-1 ) {
|
||||
v=victim;
|
||||
victim = (victim+1)%Ncache;
|
||||
}
|
||||
|
||||
if ( Entries[v].valid ) {
|
||||
ret = Entries[v].address;
|
||||
Entries[v].valid = 0;
|
||||
Entries[v].address = NULL;
|
||||
Entries[v].bytes = 0;
|
||||
}
|
||||
|
||||
Entries[v].address=ptr;
|
||||
Entries[v].bytes =bytes;
|
||||
Entries[v].valid =1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *PointerCache::Lookup(size_t bytes) {
|
||||
|
||||
if (bytes < 4096 ) return NULL;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
assert(omp_in_parallel()==0);
|
||||
#endif
|
||||
|
||||
for(int e=0;e<Ncache;e++){
|
||||
if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
|
||||
Entries[e].valid = 0;
|
||||
return Entries[e].address;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES)
|
||||
{
|
||||
#ifdef __linux__
|
||||
int fd = open("/proc/self/pagemap", O_RDONLY);
|
||||
assert(fd >= 0);
|
||||
const int page_size = 4096;
|
||||
uint64_t virt_pfn = (uint64_t)Buf / page_size;
|
||||
off_t offset = sizeof(uint64_t) * virt_pfn;
|
||||
uint64_t npages = (BYTES + page_size-1) / page_size;
|
||||
uint64_t pagedata[npages];
|
||||
uint64_t ret = lseek(fd, offset, SEEK_SET);
|
||||
assert(ret == offset);
|
||||
ret = ::read(fd, pagedata, sizeof(uint64_t)*npages);
|
||||
assert(ret == sizeof(uint64_t) * npages);
|
||||
int nhugepages = npages / 512;
|
||||
int n4ktotal, nnothuge;
|
||||
n4ktotal = 0;
|
||||
nnothuge = 0;
|
||||
for (int i = 0; i < nhugepages; ++i) {
|
||||
uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size;
|
||||
for (int j = 0; j < 512; ++j) {
|
||||
uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size;
|
||||
++n4ktotal;
|
||||
if (pageaddr != baseaddr + j * page_size)
|
||||
++nnothuge;
|
||||
}
|
||||
}
|
||||
int rank = CartesianCommunicator::RankWorld();
|
||||
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string sizeString(const size_t bytes)
|
||||
{
|
||||
constexpr unsigned int bufSize = 256;
|
||||
const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
|
||||
char buf[256];
|
||||
size_t s = 0;
|
||||
double count = bytes;
|
||||
|
||||
while (count >= 1024 && s < 7)
|
||||
{
|
||||
s++;
|
||||
count /= 1024;
|
||||
}
|
||||
if (count - floor(count) == 0.0)
|
||||
{
|
||||
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
|
||||
}
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
240
Grid/allocator/AlignedAllocator.h
Normal file
240
Grid/allocator/AlignedAllocator.h
Normal file
@ -0,0 +1,240 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/AlignedAllocator.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_ALIGNED_ALLOCATOR_H
|
||||
#define GRID_ALIGNED_ALLOCATOR_H
|
||||
|
||||
#ifdef HAVE_MALLOC_MALLOC_H
|
||||
#include <malloc/malloc.h>
|
||||
#endif
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
#include <mm_malloc.h>
|
||||
#endif
|
||||
|
||||
#define POINTER_CACHE
|
||||
#define GRID_ALLOC_ALIGN (2*1024*1024)
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// Move control to configure.ac and Config.h?
|
||||
#ifdef POINTER_CACHE
|
||||
class PointerCache {
|
||||
private:
|
||||
|
||||
static const int Ncache=8;
|
||||
static int victim;
|
||||
|
||||
typedef struct {
|
||||
void *address;
|
||||
size_t bytes;
|
||||
int valid;
|
||||
} PointerCacheEntry;
|
||||
|
||||
static PointerCacheEntry Entries[Ncache];
|
||||
|
||||
public:
|
||||
|
||||
|
||||
static void *Insert(void *ptr,size_t bytes) ;
|
||||
static void *Lookup(size_t bytes) ;
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
std::string sizeString(size_t bytes);
|
||||
|
||||
struct MemoryStats
|
||||
{
|
||||
size_t totalAllocated{0}, maxAllocated{0},
|
||||
currentlyAllocated{0}, totalFreed{0};
|
||||
};
|
||||
|
||||
class MemoryProfiler
|
||||
{
|
||||
public:
|
||||
static MemoryStats *stats;
|
||||
static bool debug;
|
||||
};
|
||||
|
||||
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
|
||||
#define profilerDebugPrint \
|
||||
if (MemoryProfiler::stats) \
|
||||
{ \
|
||||
auto s = MemoryProfiler::stats; \
|
||||
std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \
|
||||
<< std::endl; \
|
||||
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
|
||||
<< std::endl; \
|
||||
}
|
||||
|
||||
#define profilerAllocate(bytes) \
|
||||
if (MemoryProfiler::stats) \
|
||||
{ \
|
||||
auto s = MemoryProfiler::stats; \
|
||||
s->totalAllocated += (bytes); \
|
||||
s->currentlyAllocated += (bytes); \
|
||||
s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \
|
||||
} \
|
||||
if (MemoryProfiler::debug) \
|
||||
{ \
|
||||
std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \
|
||||
profilerDebugPrint; \
|
||||
}
|
||||
|
||||
#define profilerFree(bytes) \
|
||||
if (MemoryProfiler::stats) \
|
||||
{ \
|
||||
auto s = MemoryProfiler::stats; \
|
||||
s->totalFreed += (bytes); \
|
||||
s->currentlyAllocated -= (bytes); \
|
||||
} \
|
||||
if (MemoryProfiler::debug) \
|
||||
{ \
|
||||
std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \
|
||||
profilerDebugPrint; \
|
||||
}
|
||||
|
||||
void check_huge_pages(void *Buf,uint64_t BYTES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// A lattice of something, but assume the something is SIMDized.
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename _Tp>
|
||||
class alignedAllocator {
|
||||
public:
|
||||
typedef std::size_t size_type;
|
||||
typedef std::ptrdiff_t difference_type;
|
||||
typedef _Tp* pointer;
|
||||
typedef const _Tp* const_pointer;
|
||||
typedef _Tp& reference;
|
||||
typedef const _Tp& const_reference;
|
||||
typedef _Tp value_type;
|
||||
|
||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||
alignedAllocator() throw() { }
|
||||
alignedAllocator(const alignedAllocator&) throw() { }
|
||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||
~alignedAllocator() throw() { }
|
||||
pointer address(reference __x) const { return &__x; }
|
||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
size_type bytes = __n*sizeof(_Tp);
|
||||
profilerAllocate(bytes);
|
||||
|
||||
|
||||
#ifdef POINTER_CACHE
|
||||
_Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
|
||||
#else
|
||||
pointer ptr = nullptr;
|
||||
#endif
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
////////////////////////////////////
|
||||
// Unified (managed) memory
|
||||
////////////////////////////////////
|
||||
if ( ptr == (_Tp *) NULL ) {
|
||||
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
||||
if( err != cudaSuccess ) {
|
||||
ptr = (_Tp *) NULL;
|
||||
std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// 2MB align; could make option probably doesn't need configurability
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
|
||||
#else
|
||||
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN,bytes);
|
||||
#endif
|
||||
#endif
|
||||
assert( ptr != (_Tp *)NULL);
|
||||
|
||||
/////////////////////////////////////////
|
||||
// First touch optimise in threaded loop
|
||||
/////////////////////////////////////////
|
||||
uint8_t *cp = (uint8_t *)ptr;
|
||||
thread_loop( (size_type n=0;n<bytes;n+=4096) , {
|
||||
cp[n]=0;
|
||||
});
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void deallocate(pointer __p, size_type __n) {
|
||||
size_type bytes = __n * sizeof(_Tp);
|
||||
|
||||
profilerFree(bytes);
|
||||
|
||||
#ifdef POINTER_CACHE
|
||||
pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
|
||||
#else
|
||||
pointer __freeme = __p;
|
||||
#endif
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
if ( __freeme ) cudaFree((void *)__freeme);
|
||||
#else
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( __freeme ) _mm_free((void *)__freeme);
|
||||
#else
|
||||
if ( __freeme ) free((void *)__freeme);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Template typedefs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T> using commAllocator = alignedAllocator<T>;
|
||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
||||
template<class T> using commVector = std::vector<T,alignedAllocator<T> >;
|
||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user