diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h index c0b0646d..584ed1d5 100644 --- a/Grid/algorithms/approx/Chebyshev.h +++ b/Grid/algorithms/approx/Chebyshev.h @@ -236,7 +236,6 @@ public: int vol=grid->gSites(); typedef typename Field::vector_type vector_type; - constexpr int Nsimd = vector_type::Nsimd(); Field T0(grid); T0 = in; Field T1(grid); @@ -264,6 +263,7 @@ public: auto Tn_v = Tn->View(); auto Tnp_v = Tnp->View(); auto Tnm_v = Tnm->View(); + constexpr int Nsimd = vector_type::Nsimd(); accelerator_forNB(ss, in.Grid()->oSites(), Nsimd, { coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss)); diff --git a/Grid/allocator/AlignedAllocator.cc b/Grid/allocator/AlignedAllocator.cc index 976dfbdc..ef6459ed 100644 --- a/Grid/allocator/AlignedAllocator.cc +++ b/Grid/allocator/AlignedAllocator.cc @@ -7,7 +7,11 @@ MemoryStats *MemoryProfiler::stats = nullptr; bool MemoryProfiler::debug = false; int PointerCache::NcacheSmall = PointerCache::NcacheSmallMax; -int PointerCache::Ncache = PointerCache::NcacheMax; +#ifdef GRID_CUDA +int PointerCache::Ncache = 32; +#else +int PointerCache::Ncache = 8; +#endif int PointerCache::Victim; int PointerCache::VictimSmall; PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::NcacheMax]; @@ -16,12 +20,16 @@ PointerCache::PointerCacheEntry PointerCache::EntriesSmall[PointerCache::NcacheS void PointerCache::Init(void) { char * str; + str= getenv("GRID_ALLOC_NCACHE_LARGE"); if ( str ) Ncache = atoi(str); if ( (Ncache<0) || (Ncache > NcacheMax)) Ncache = NcacheMax; + str= getenv("GRID_ALLOC_NCACHE_SMALL"); if ( str ) NcacheSmall = atoi(str); if ( (NcacheSmall<0) || (NcacheSmall > NcacheSmallMax)) NcacheSmall = NcacheSmallMax; + + // printf("Aligned alloocator cache: large %d/%d small %d/%d\n",Ncache,NcacheMax,NcacheSmall,NcacheSmallMax); } void *PointerCache::Insert(void *ptr,size_t bytes) { diff --git a/Grid/communicator/SharedMemory.cc b/Grid/communicator/SharedMemory.cc index 5bca9764..de10da3d 100644 --- a/Grid/communicator/SharedMemory.cc +++ b/Grid/communicator/SharedMemory.cc @@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){ if (heap_bytes >= heap_size) { std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < > latts; -#if 0 +#if 1 latts.push_back(std::vector ({24,24,24,24}) ); latts.push_back(std::vector ({48,24,24,24}) ); latts.push_back(std::vector ({96,24,24,24}) ); @@ -157,7 +157,7 @@ void benchDw(std::vector & latt4, int Ls) std::cout <<"\t"<