mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
summit mem
This commit is contained in:
parent
32fbdf4fb1
commit
a7635fd5ba
@ -47,6 +47,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/log/Log.h>
|
||||
#include <Grid/allocator/GridMemoryManager.h>
|
||||
#include <Grid/allocator/AlignedAllocator.h>
|
||||
#include <Grid/simd/Simd.h>
|
||||
#include <Grid/threads/Threads.h>
|
||||
|
@ -178,12 +178,13 @@ public:
|
||||
////////////////////////////////////
|
||||
if ( ptr == (_Tp *) NULL ) {
|
||||
// printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout);
|
||||
auto err = cudaMallocManaged((void **)&ptr,bytes);
|
||||
if( err != cudaSuccess ) {
|
||||
// auto err =
|
||||
gridMallocManaged((void **)&ptr,bytes);
|
||||
/*if( err != cudaSuccess ) {
|
||||
ptr = (_Tp *) NULL;
|
||||
std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
|
||||
assert(0);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
assert( ptr != (_Tp *)NULL);
|
||||
#else
|
||||
@ -220,7 +221,7 @@ public:
|
||||
#endif
|
||||
|
||||
#ifdef GRID_NVCC
|
||||
if ( __freeme ) cudaFree((void *)__freeme);
|
||||
if ( __freeme ) gridFree((void *)__freeme);
|
||||
#else
|
||||
#ifdef HAVE_MM_MALLOC_H
|
||||
if ( __freeme ) _mm_free((void *)__freeme);
|
||||
|
131
Grid/allocator/GridMemoryManager.cc
Normal file
131
Grid/allocator/GridMemoryManager.cc
Normal file
@ -0,0 +1,131 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#define _GRID_MEM_PAGE_SIZE 4096
|
||||
void* _grid_mem_base = 0;
|
||||
size_t _grid_mem_pages;
|
||||
struct _grid_mem_range {
|
||||
size_t page_start, page_end;
|
||||
};
|
||||
std::vector<_grid_mem_range> _grid_mem_avail;
|
||||
std::map<void*,_grid_mem_range> _grid_mem_alloc;
|
||||
|
||||
void gridMemoryInit() {
|
||||
size_t free,total;
|
||||
cudaMemGetInfo(&free,&total);
|
||||
|
||||
char* ev = getenv("GRID_DEVICE_BYTES_FOR_CACHE");
|
||||
if (ev) {
|
||||
long bytes;
|
||||
assert(sscanf(ev,"%ld",&bytes)==1);
|
||||
free -= bytes;
|
||||
}
|
||||
|
||||
_grid_mem_pages = free / _GRID_MEM_PAGE_SIZE;
|
||||
size_t sz = _grid_mem_pages * _GRID_MEM_PAGE_SIZE;
|
||||
|
||||
assert(cudaSuccess == cudaMallocManaged(&_grid_mem_base,sz));
|
||||
|
||||
int target;
|
||||
cudaGetDevice(&target);
|
||||
cudaMemAdvise(_grid_mem_base,sz,cudaMemAdviseSetPreferredLocation,target);
|
||||
|
||||
assert(cudaSuccess == cudaMemset(_grid_mem_base,0,sz)); // touch on device
|
||||
std::cout << GridLogMessage << "gridMemoryInit: " << sz << " bytes" << std::endl;
|
||||
|
||||
_grid_mem_avail.push_back( { 0, _grid_mem_pages } );
|
||||
}
|
||||
|
||||
void gridMallocManaged(void** pp, size_t sz) {
|
||||
|
||||
if (_grid_mem_avail.empty())
|
||||
gridMemoryInit();
|
||||
|
||||
size_t pages = (sz + _GRID_MEM_PAGE_SIZE - 1) / _GRID_MEM_PAGE_SIZE;
|
||||
// find free block
|
||||
size_t m;
|
||||
for (m=0;m<_grid_mem_avail.size();m++) {
|
||||
auto & b = _grid_mem_avail[m];
|
||||
if (b.page_end - b.page_start >= pages)
|
||||
break;
|
||||
}
|
||||
if (m == _grid_mem_avail.size()) {
|
||||
std::cout << GridLogMessage << "Out of memory" << std::endl;
|
||||
assert(0);
|
||||
}
|
||||
*pp = (char*)_grid_mem_base + _GRID_MEM_PAGE_SIZE*_grid_mem_avail[m].page_start;
|
||||
_grid_mem_alloc[*pp] = { _grid_mem_avail[m].page_start, _grid_mem_avail[m].page_start + pages };
|
||||
_grid_mem_avail[m].page_start += pages;
|
||||
}
|
||||
|
||||
void gridFree(void* p) {
|
||||
|
||||
if (_grid_mem_avail.empty())
|
||||
gridMemoryInit();
|
||||
|
||||
auto & alloc = _grid_mem_alloc[p];
|
||||
if (alloc.page_start == alloc.page_end) {
|
||||
free(p);
|
||||
//cudaFreeHost(p);
|
||||
} else {
|
||||
// can we enlarge existing one?
|
||||
for (size_t m=0;m<_grid_mem_avail.size();m++) {
|
||||
auto & b = _grid_mem_avail[m];
|
||||
if (b.page_start == alloc.page_end) {
|
||||
b.page_start = alloc.page_start;
|
||||
return;
|
||||
}
|
||||
if (b.page_end == alloc.page_start) {
|
||||
b.page_end = alloc.page_end;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// fragment memory
|
||||
_grid_mem_avail.push_back( alloc );
|
||||
}
|
||||
_grid_mem_alloc.erase(p);
|
||||
}
|
||||
|
||||
void gridAcceleratorPrefetch(void* p, size_t sz) {
|
||||
|
||||
auto & alloc = _grid_mem_alloc[p];
|
||||
if (alloc.page_start == alloc.page_end) // pinned to host
|
||||
return;
|
||||
|
||||
int target;
|
||||
cudaGetDevice(&target);
|
||||
cudaMemPrefetchAsync(p,sz,target);
|
||||
}
|
||||
|
||||
void gridMemGetInfo(size_t* pfree, size_t* ptotal) {
|
||||
|
||||
if (_grid_mem_avail.empty())
|
||||
gridMemoryInit();
|
||||
|
||||
*ptotal = _grid_mem_pages * _GRID_MEM_PAGE_SIZE;
|
||||
*pfree = 0;
|
||||
for (auto & a : _grid_mem_avail)
|
||||
*pfree += (a.page_end - a.page_start) * _GRID_MEM_PAGE_SIZE;
|
||||
}
|
||||
|
||||
void gridMoveToHost(void** pp) {
|
||||
|
||||
if (_grid_mem_avail.empty())
|
||||
gridMemoryInit();
|
||||
|
||||
auto & alloc = _grid_mem_alloc[*pp];
|
||||
if (alloc.page_start == alloc.page_end) // already on host
|
||||
return;
|
||||
|
||||
size_t sz = (alloc.page_end - alloc.page_start) * _GRID_MEM_PAGE_SIZE;
|
||||
void*pn;
|
||||
//assert(cudaSuccess == cudaMallocHost(&pn,sz));
|
||||
pn = malloc(sz);
|
||||
memcpy(pn,*pp,sz);
|
||||
gridFree(*pp);
|
||||
*pp = pn;
|
||||
_grid_mem_alloc[pn] = { 0,0 };
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
42
Grid/allocator/GridMemoryManager.h
Normal file
42
Grid/allocator/GridMemoryManager.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/GridMemoryManager.h
|
||||
|
||||
Copyright (C) 2020
|
||||
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_MEMORY_MANAGER_H
|
||||
#define GRID_MEMORY_MANAGER_H
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
void gridMemoryInit();
|
||||
void gridMallocManaged(void** pp, size_t sz);
|
||||
void gridMoveToHost(void** pp);
|
||||
void gridAcceleratorPrefetch(void* p, size_t sz);
|
||||
void gridMemGetInfo(size_t* pfree, size_t* ptotal);
|
||||
void gridFree(void* p);
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
@ -97,33 +97,14 @@ public:
|
||||
else grid = _grid;
|
||||
};
|
||||
|
||||
accelerator_inline void Advise(int advise) {
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
if (advise & AdviseInfrequentUse) {
|
||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetPreferredLocation,cudaCpuDeviceId);
|
||||
}
|
||||
if (advise & AdviseReadMostly) {
|
||||
cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
accelerator_inline void AcceleratorPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
int target;
|
||||
cudaGetDevice(&target);
|
||||
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),target);
|
||||
#endif
|
||||
#endif
|
||||
gridAcceleratorPrefetch(_odata,_odata_size*sizeof(vobj));
|
||||
};
|
||||
|
||||
accelerator_inline void HostPrefetch(int accessMode = ViewReadWrite) { // will use accessMode in future
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
|
||||
//cudaMemPrefetchAsync(_odata,_odata_size*sizeof(vobj),cudaCpuDeviceId);
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
@ -253,6 +234,20 @@ private:
|
||||
}
|
||||
}
|
||||
public:
|
||||
|
||||
void Advise(int advise) {
|
||||
#ifdef GRID_NVCC
|
||||
#ifndef __CUDA_ARCH__ // only on host
|
||||
if (advise & AdviseInfrequentUse) {
|
||||
gridMoveToHost((void**)&this->_odata);
|
||||
}
|
||||
if (advise & AdviseReadMostly) {
|
||||
//cudaMemAdvise(_odata,_odata_size*sizeof(vobj),cudaMemAdviseSetReadMostly,-1);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
// Return a view object that may be dereferenced in site loops.
|
||||
// The view is trivially copy constructible and may be copied to an accelerator device
|
||||
|
@ -96,7 +96,7 @@ accelerator_inline void convertType(ComplexF & out, const std::complex<float> &
|
||||
out = in;
|
||||
}
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
#ifdef GRID_NVCC
|
||||
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
|
||||
((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user