/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/AlignedAllocator.h Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_ALIGNED_ALLOCATOR_H #define GRID_ALIGNED_ALLOCATOR_H #ifdef HAVE_MALLOC_MALLOC_H #include #endif #ifdef HAVE_MALLOC_H #include #endif #ifdef HAVE_MM_MALLOC_H #include #endif namespace Grid { class PointerCache { private: static const int Ncache=8; static int victim; typedef struct { void *address; size_t bytes; int valid; } PointerCacheEntry; static PointerCacheEntry Entries[Ncache]; public: static void *Insert(void *ptr,size_t bytes) ; static void *Lookup(size_t bytes) ; }; struct MemoryStats { size_t totalAllocated{0}, maxAllocated{0}, currentlyAllocated{0}, totalFreed{0}; }; class MemoryProfiler { public: static MemoryStats *stats; }; void check_huge_pages(void *Buf,uint64_t BYTES); //////////////////////////////////////////////////////////////////// // A lattice of something, but assume the something is SIMDized. //////////////////////////////////////////////////////////////////// template class alignedAllocator { public: typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef _Tp* pointer; typedef const _Tp* const_pointer; typedef _Tp& reference; typedef const _Tp& const_reference; typedef _Tp value_type; template struct rebind { typedef alignedAllocator<_Tp1> other; }; alignedAllocator() throw() { } alignedAllocator(const alignedAllocator&) throw() { } template alignedAllocator(const alignedAllocator<_Tp1>&) throw() { } ~alignedAllocator() throw() { } pointer address(reference __x) const { return &__x; } size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); if (auto s = MemoryProfiler::stats) { s->totalAllocated += bytes; s->currentlyAllocated += bytes; s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); } _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); // if ( ptr != NULL ) // std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr <totalFreed += bytes; s->currentlyAllocated -= bytes; } pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes); #ifdef HAVE_MM_MALLOC_H if ( __freeme ) _mm_free((void *)__freeme); #else if ( __freeme ) free((void *)__freeme); #endif } void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } ////////////////////////////////////////////////////////////////////////////////////////// // MPI3 : comms must use shm region // SHMEM: comms must use symmetric heap ////////////////////////////////////////////////////////////////////////////////////////// #ifdef GRID_COMMS_SHMEM extern "C" { #include extern void * shmem_align(size_t, size_t); extern void shmem_free(void *); } #define PARANOID_SYMMETRIC_HEAP #endif template class commAllocator { public: typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef _Tp* pointer; typedef const _Tp* const_pointer; typedef _Tp& reference; typedef const _Tp& const_reference; typedef _Tp value_type; template struct rebind { typedef commAllocator<_Tp1> other; }; commAllocator() throw() { } commAllocator(const commAllocator&) throw() { } template commAllocator(const commAllocator<_Tp1>&) throw() { } ~commAllocator() throw() { } pointer address(reference __x) const { return &__x; } size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } #ifdef GRID_COMMS_SHMEM pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); if (auto s = MemoryProfiler::stats) { s->totalAllocated += bytes; s->currentlyAllocated += bytes; s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); } #ifdef CRAY _Tp *ptr = (_Tp *) shmem_align(bytes,64); #else _Tp *ptr = (_Tp *) shmem_align(64,bytes); #endif #ifdef PARANOID_SYMMETRIC_HEAP static void * bcast; static long psync[_SHMEM_REDUCE_SYNC_SIZE]; bcast = (void *) ptr; shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync); if ( bcast != ptr ) { std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout); // BACKTRACEFILE(); exit(0); } assert( bcast == (void *) ptr); #endif return ptr; } void deallocate(pointer __p, size_type __n) { size_type bytes = __n*sizeof(_Tp); if (auto s = MemoryProfiler::stats) { s->totalFreed += bytes; s->currentlyAllocated -= bytes; } shmem_free((void *)__p); } #else pointer allocate(size_type __n, const void* _p= 0) { size_type bytes = __n*sizeof(_Tp); if (auto s = MemoryProfiler::stats) { s->totalAllocated += bytes; s->currentlyAllocated += bytes; s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); } #ifdef HAVE_MM_MALLOC_H _Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN); #else _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes); #endif uint8_t *cp = (uint8_t *)ptr; if ( ptr ) { // One touch per 4k page, static OMP loop to catch same loop order #pragma omp parallel for schedule(static) for(size_type n=0;ntotalFreed += bytes; s->currentlyAllocated -= bytes; } #ifdef HAVE_MM_MALLOC_H _mm_free((void *)__p); #else free((void *)__p); #endif } #endif void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; template inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; } template inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; } //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// template using Vector = std::vector >; template using commVector = std::vector >; template using Matrix = std::vector > >; }; // namespace Grid #endif