1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

summit mem

This commit is contained in:
Christoph Lehner 2020-05-18 17:52:26 -04:00
parent 32fbdf4fb1
commit a7635fd5ba
6 changed files with 197 additions and 27 deletions

View File

@ -47,6 +47,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/util/Util.h>
#include <Grid/log/Log.h>
#include <Grid/allocator/GridMemoryManager.h>
#include <Grid/allocator/AlignedAllocator.h>
#include <Grid/simd/Simd.h>
#include <Grid/threads/Threads.h>

View File

@ -178,12 +178,13 @@ public:
////////////////////////////////////
if ( ptr == (_Tp *) NULL ) {
// printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout);
auto err = cudaMallocManaged((void **)&ptr,bytes);
if( err != cudaSuccess ) {
// auto err =
gridMallocManaged((void **)&ptr,bytes);
/*if( err != cudaSuccess ) {
ptr = (_Tp *) NULL;
std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
assert(0);
}
}*/
}
assert( ptr != (_Tp *)NULL);
#else
@ -220,7 +221,7 @@ public:
#endif
#ifdef GRID_NVCC
if ( __freeme ) cudaFree((void *)__freeme);
if ( __freeme ) gridFree((void *)__freeme);
#else
#ifdef HAVE_MM_MALLOC_H
if ( __freeme ) _mm_free((void *)__freeme);

View File

@ -0,0 +1,131 @@
#include <Grid/GridCore.h>
NAMESPACE_BEGIN(Grid);
#define _GRID_MEM_PAGE_SIZE 4096
void* _grid_mem_base = 0;
size_t _grid_mem_pages;
struct _grid_mem_range {
size_t page_start, page_end;
};
std::vector<_grid_mem_range> _grid_mem_avail;
std::map<void*,_grid_mem_range> _grid_mem_alloc;
void gridMemoryInit() {
size_t free,total;
cudaMemGetInfo(&free,&total);
char* ev = getenv("GRID_DEVICE_BYTES_FOR_CACHE");
if (ev) {
long bytes;
assert(sscanf(ev,"%ld",&bytes)==1);
free -= bytes;
}
_grid_mem_pages = free / _GRID_MEM_PAGE_SIZE;
size_t sz = _grid_mem_pages * _GRID_MEM_PAGE_SIZE;
assert(cudaSuccess == cudaMallocManaged(&_grid_mem_base,sz));
int target;
cudaGetDevice(&target);
cudaMemAdvise(_grid_mem_base,sz,cudaMemAdviseSetPreferredLocation,target);
assert(cudaSuccess == cudaMemset(_grid_mem_base,0,sz)); // touch on device
std::cout << GridLogMessage << "gridMemoryInit: " << sz << " bytes" << std::endl;
_grid_mem_avail.push_back( { 0, _grid_mem_pages } );
}
void gridMallocManaged(void** pp, size_t sz) {
if (_grid_mem_avail.empty())
gridMemoryInit();
size_t pages = (sz + _GRID_MEM_PAGE_SIZE - 1) / _GRID_MEM_PAGE_SIZE;
// find free block
size_t m;
for (m=0;m<_grid_mem_avail.size();m++) {
auto & b = _grid_mem_avail[m];
if (b.page_end - b.page_start >= pages)
break;
}
if (m == _grid_mem_avail.size()) {
std::cout << GridLogMessage << "Out of memory" << std::endl;
assert(0);
}
*pp = (char*)_grid_mem_base + _GRID_MEM_PAGE_SIZE*_grid_mem_avail[m].page_start;
_grid_mem_alloc[*pp] = { _grid_mem_avail[m].page_start, _grid_mem_avail[m].page_start + pages };
_grid_mem_avail[m].page_start += pages;
}
void gridFree(void* p) {
if (_grid_mem_avail.empty())
gridMemoryInit();
auto & alloc = _grid_mem_alloc[p];
if (alloc.page_start == alloc.page_end) {
free(p);
//cudaFreeHost(p);
} else {
// can we enlarge existing one?
for (size_t m=0;m<_grid_mem_avail.size();m++) {
auto & b = _grid_mem_avail[m];
if (b.page_start == alloc.page_end) {
b.page_start = alloc.page_start;
return;
}
if (b.page_end == alloc.page_start) {
b.page_end = alloc.page_end;
return;
}
}
// fragment memory
_grid_mem_avail.push_back( alloc );
}
_grid_mem_alloc.erase(p);
}
void gridAcceleratorPrefetch(void* p, size_t sz) {
auto & alloc = _grid_mem_alloc[p];
if (alloc.page_start == alloc.page_end) // pinned to host
return;
int target;
cudaGetDevice(&target);
cudaMemPrefetchAsync(p,sz,target);
}
void gridMemGetInfo(size_t* pfree, size_t* ptotal) {
if (_grid_mem_avail.empty())
gridMemoryInit();
*ptotal = _grid_mem_pages * _GRID_MEM_PAGE_SIZE;
*pfree = 0;
for (auto & a : _grid_mem_avail)
*pfree += (a.page_end - a.page_start) * _GRID_MEM_PAGE_SIZE;
}
void gridMoveToHost(void** pp) {
if (_grid_mem_avail.empty())
gridMemoryInit();
auto & alloc = _grid_mem_alloc[*pp];
if (alloc.page_start == alloc.page_end) // already on host
return;
size_t sz = (alloc.page_end - alloc.page_start) * _GRID_MEM_PAGE_SIZE;
void*pn;
//assert(cudaSuccess == cudaMallocHost(&pn,sz));
pn = malloc(sz);
memcpy(pn,*pp,sz);
gridFree(*pp);
*pp = pn;
_grid_mem_alloc[pn] = { 0,0 };
}
NAMESPACE_END(Grid);

View File

@ -0,0 +1,42 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/GridMemoryManager.h
Copyright (C) 2020
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MEMORY_MANAGER_H
#define GRID_MEMORY_MANAGER_H
NAMESPACE_BEGIN(Grid);
void gridMemoryInit();
void gridMallocManaged(void** pp, size_t sz);
void gridMoveToHost(void** pp);
void gridAcceleratorPrefetch(void* p, size_t sz);
void gridMemGetInfo(size_t* pfree, size_t* ptotal);
void gridFree(void* p);
NAMESPACE_END(Grid);
#endif

View File

@ -97,33 +97,14 @@ public:
else grid = _grid;
};
accelerator_inline void Advise(int advise) {
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
if (advise & AdviseInfrequentUse) {
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
}
if (advise & AdviseReadMostly) {
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
}
#endif
#endif
};
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
int target;
cudaGetDevice(&target);
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target);
#endif
#endif
gridAcceleratorPrefetch(_odata,_odata_size*sizeof(vobj));
};
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
//cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
#endif
#endif
};
@ -246,13 +227,27 @@ private:
dealloc();
this->_odata_size = size;
if ( size )
if ( size )
this->_odata = alloc.allocate(this->_odata_size);
else
this->_odata = nullptr;
}
}
public:
void Advise(int advise) {
#ifdef GRID_NVCC
#ifndef __CUDA_ARCH__ // only on host
if (advise & AdviseInfrequentUse) {
gridMoveToHost((void**)&this->_odata);
}
if (advise & AdviseReadMostly) {
//cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
}
#endif
#endif
};
/////////////////////////////////////////////////////////////////////////////////
// Return a view object that may be dereferenced in site loops.
// The view is trivially copy constructible and may be copied to an accelerator device

View File

@ -96,7 +96,7 @@ accelerator_inline void convertType(ComplexF & out, const std::complex<float> &
out = in;
}
#ifdef __CUDA_ARCH__
#ifdef GRID_NVCC
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in;
}