1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-19 16:55:37 +01:00

Allocator cache spliit into large/small pools

This commit is contained in:
Peter Boyle 2020-05-10 05:24:26 -04:00
parent 2bb2c68e15
commit ea08f193e7
4 changed files with 15 additions and 5 deletions

View File

@ -236,7 +236,6 @@ public:
int vol=grid->gSites();
typedef typename Field::vector_type vector_type;
constexpr int Nsimd = vector_type::Nsimd();
Field T0(grid); T0 = in;
Field T1(grid);
@ -264,6 +263,7 @@ public:
auto Tn_v = Tn->View();
auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View();
constexpr int Nsimd = vector_type::Nsimd();
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));

View File

@ -7,7 +7,11 @@ MemoryStats *MemoryProfiler::stats = nullptr;
bool MemoryProfiler::debug = false;
int PointerCache::NcacheSmall = PointerCache::NcacheSmallMax;
int PointerCache::Ncache = PointerCache::NcacheMax;
#ifdef GRID_CUDA
int PointerCache::Ncache = 32;
#else
int PointerCache::Ncache = 8;
#endif
int PointerCache::Victim;
int PointerCache::VictimSmall;
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::NcacheMax];
@ -16,12 +20,16 @@ PointerCache::PointerCacheEntry PointerCache::EntriesSmall[PointerCache::NcacheS
void PointerCache::Init(void)
{
char * str;
str= getenv("GRID_ALLOC_NCACHE_LARGE");
if ( str ) Ncache = atoi(str);
if ( (Ncache<0) || (Ncache > NcacheMax)) Ncache = NcacheMax;
str= getenv("GRID_ALLOC_NCACHE_SMALL");
if ( str ) NcacheSmall = atoi(str);
if ( (NcacheSmall<0) || (NcacheSmall > NcacheSmallMax)) NcacheSmall = NcacheSmallMax;
// printf("Aligned alloocator cache: large %d/%d small %d/%d\n",Ncache,NcacheMax,NcacheSmall,NcacheSmallMax);
}
void *PointerCache::Insert(void *ptr,size_t bytes)
{

View File

@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){
if (heap_bytes >= heap_size) {
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
std::cout<< " Current bytes is " << (heap_bytes/(1024*1024)) <<"MB"<<std::endl;
std::cout<< " Current heap is " << (heap_size/(1024*1024)) <<"MB"<<std::endl;
assert(heap_bytes<heap_size);
}
//std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;

View File

@ -47,7 +47,7 @@ int main (int argc, char ** argv)
const int Ls=12;
std::vector< std::vector<int> > latts;
#if 0
#if 1
latts.push_back(std::vector<int> ({24,24,24,24}) );
latts.push_back(std::vector<int> ({48,24,24,24}) );
latts.push_back(std::vector<int> ({96,24,24,24}) );
@ -157,7 +157,7 @@ void benchDw(std::vector<int> & latt4, int Ls)
std::cout <<"\t"<<flops/(t1-t0)<<"\t"<<(t1-t0)/1000./1000.<<" s\t";
// Cheby uses MpcDagMpc so 2x flops
for(int i=0;i<100;i++){
for(int i=0;i<1;i++){
Cheby(Mpc,src_o,r_o);
t0=usecond();
Cheby(Mpc,src_o,r_o);