mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Allocator cache spliit into large/small pools
This commit is contained in:
parent
2bb2c68e15
commit
ea08f193e7
@ -236,7 +236,6 @@ public:
|
|||||||
|
|
||||||
int vol=grid->gSites();
|
int vol=grid->gSites();
|
||||||
typedef typename Field::vector_type vector_type;
|
typedef typename Field::vector_type vector_type;
|
||||||
constexpr int Nsimd = vector_type::Nsimd();
|
|
||||||
|
|
||||||
Field T0(grid); T0 = in;
|
Field T0(grid); T0 = in;
|
||||||
Field T1(grid);
|
Field T1(grid);
|
||||||
@ -264,6 +263,7 @@ public:
|
|||||||
auto Tn_v = Tn->View();
|
auto Tn_v = Tn->View();
|
||||||
auto Tnp_v = Tnp->View();
|
auto Tnp_v = Tnp->View();
|
||||||
auto Tnm_v = Tnm->View();
|
auto Tnm_v = Tnm->View();
|
||||||
|
constexpr int Nsimd = vector_type::Nsimd();
|
||||||
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
|
accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, {
|
||||||
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
|
||||||
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
|
||||||
|
@ -7,7 +7,11 @@ MemoryStats *MemoryProfiler::stats = nullptr;
|
|||||||
bool MemoryProfiler::debug = false;
|
bool MemoryProfiler::debug = false;
|
||||||
|
|
||||||
int PointerCache::NcacheSmall = PointerCache::NcacheSmallMax;
|
int PointerCache::NcacheSmall = PointerCache::NcacheSmallMax;
|
||||||
int PointerCache::Ncache = PointerCache::NcacheMax;
|
#ifdef GRID_CUDA
|
||||||
|
int PointerCache::Ncache = 32;
|
||||||
|
#else
|
||||||
|
int PointerCache::Ncache = 8;
|
||||||
|
#endif
|
||||||
int PointerCache::Victim;
|
int PointerCache::Victim;
|
||||||
int PointerCache::VictimSmall;
|
int PointerCache::VictimSmall;
|
||||||
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::NcacheMax];
|
PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::NcacheMax];
|
||||||
@ -16,12 +20,16 @@ PointerCache::PointerCacheEntry PointerCache::EntriesSmall[PointerCache::NcacheS
|
|||||||
void PointerCache::Init(void)
|
void PointerCache::Init(void)
|
||||||
{
|
{
|
||||||
char * str;
|
char * str;
|
||||||
|
|
||||||
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
str= getenv("GRID_ALLOC_NCACHE_LARGE");
|
||||||
if ( str ) Ncache = atoi(str);
|
if ( str ) Ncache = atoi(str);
|
||||||
if ( (Ncache<0) || (Ncache > NcacheMax)) Ncache = NcacheMax;
|
if ( (Ncache<0) || (Ncache > NcacheMax)) Ncache = NcacheMax;
|
||||||
|
|
||||||
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
str= getenv("GRID_ALLOC_NCACHE_SMALL");
|
||||||
if ( str ) NcacheSmall = atoi(str);
|
if ( str ) NcacheSmall = atoi(str);
|
||||||
if ( (NcacheSmall<0) || (NcacheSmall > NcacheSmallMax)) NcacheSmall = NcacheSmallMax;
|
if ( (NcacheSmall<0) || (NcacheSmall > NcacheSmallMax)) NcacheSmall = NcacheSmallMax;
|
||||||
|
|
||||||
|
// printf("Aligned alloocator cache: large %d/%d small %d/%d\n",Ncache,NcacheMax,NcacheSmall,NcacheSmallMax);
|
||||||
}
|
}
|
||||||
void *PointerCache::Insert(void *ptr,size_t bytes)
|
void *PointerCache::Insert(void *ptr,size_t bytes)
|
||||||
{
|
{
|
||||||
|
@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){
|
|||||||
if (heap_bytes >= heap_size) {
|
if (heap_bytes >= heap_size) {
|
||||||
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||||
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||||
std::cout<< " Current value is " << (heap_size/(1024*1024)) <<std::endl;
|
std::cout<< " Current alloc is " << (bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
std::cout<< " Current bytes is " << (heap_bytes/(1024*1024)) <<"MB"<<std::endl;
|
||||||
|
std::cout<< " Current heap is " << (heap_size/(1024*1024)) <<"MB"<<std::endl;
|
||||||
assert(heap_bytes<heap_size);
|
assert(heap_bytes<heap_size);
|
||||||
}
|
}
|
||||||
//std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;
|
//std::cerr << "ShmBufferMalloc "<<std::hex<< ptr<<" - "<<((uint64_t)ptr+bytes)<<std::dec<<std::endl;
|
||||||
|
@ -47,7 +47,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
const int Ls=12;
|
const int Ls=12;
|
||||||
std::vector< std::vector<int> > latts;
|
std::vector< std::vector<int> > latts;
|
||||||
#if 0
|
#if 1
|
||||||
latts.push_back(std::vector<int> ({24,24,24,24}) );
|
latts.push_back(std::vector<int> ({24,24,24,24}) );
|
||||||
latts.push_back(std::vector<int> ({48,24,24,24}) );
|
latts.push_back(std::vector<int> ({48,24,24,24}) );
|
||||||
latts.push_back(std::vector<int> ({96,24,24,24}) );
|
latts.push_back(std::vector<int> ({96,24,24,24}) );
|
||||||
@ -157,7 +157,7 @@ void benchDw(std::vector<int> & latt4, int Ls)
|
|||||||
std::cout <<"\t"<<flops/(t1-t0)<<"\t"<<(t1-t0)/1000./1000.<<" s\t";
|
std::cout <<"\t"<<flops/(t1-t0)<<"\t"<<(t1-t0)/1000./1000.<<" s\t";
|
||||||
|
|
||||||
// Cheby uses MpcDagMpc so 2x flops
|
// Cheby uses MpcDagMpc so 2x flops
|
||||||
for(int i=0;i<100;i++){
|
for(int i=0;i<1;i++){
|
||||||
Cheby(Mpc,src_o,r_o);
|
Cheby(Mpc,src_o,r_o);
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
Cheby(Mpc,src_o,r_o);
|
Cheby(Mpc,src_o,r_o);
|
||||||
|
Loading…
Reference in New Issue
Block a user