1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00
This commit is contained in:
paboyle 2018-01-13 00:19:19 +00:00
parent 8cb7a1a887
commit f4272aa6fd
2 changed files with 87 additions and 85 deletions

View File

@ -1,7 +1,7 @@
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <fcntl.h> #include <fcntl.h>
namespace Grid { NAMESPACE_BEGIN(Grid);
MemoryStats *MemoryProfiler::stats = nullptr; MemoryStats *MemoryProfiler::stats = nullptr;
bool MemoryProfiler::debug = false; bool MemoryProfiler::debug = false;
@ -49,7 +49,7 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
void *PointerCache::Lookup(size_t bytes) { void *PointerCache::Lookup(size_t bytes) {
if (bytes < 4096 ) return NULL; if (bytes < 4096 ) return NULL;
#ifdef _OPENMP #ifdef _OPENMP
assert(omp_in_parallel()==0); assert(omp_in_parallel()==0);
@ -90,7 +90,7 @@ void check_huge_pages(void *Buf,uint64_t BYTES)
++n4ktotal; ++n4ktotal;
if (pageaddr != baseaddr + j * page_size) if (pageaddr != baseaddr + j * page_size)
++nnothuge; ++nnothuge;
} }
} }
int rank = CartesianCommunicator::RankWorld(); int rank = CartesianCommunicator::RankWorld();
printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge);
@ -106,20 +106,21 @@ std::string sizeString(const size_t bytes)
double count = bytes; double count = bytes;
while (count >= 1024 && s < 7) while (count >= 1024 && s < 7)
{ {
s++; s++;
count /= 1024; count /= 1024;
} }
if (count - floor(count) == 0.0) if (count - floor(count) == 0.0)
{ {
snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]); snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
} }
else else
{ {
snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]); snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
} }
return std::string(buf); return std::string(buf);
} }
} NAMESPACE_END(Grid);

View File

@ -24,8 +24,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_ALIGNED_ALLOCATOR_H #ifndef GRID_ALIGNED_ALLOCATOR_H
#define GRID_ALIGNED_ALLOCATOR_H #define GRID_ALIGNED_ALLOCATOR_H
@ -40,89 +40,89 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <mm_malloc.h> #include <mm_malloc.h>
#endif #endif
namespace Grid { NAMESPACE_BEGIN(Grid);
class PointerCache { class PointerCache {
private: private:
static const int Ncache=8; static const int Ncache=8;
static int victim; static int victim;
typedef struct { typedef struct {
void *address; void *address;
size_t bytes; size_t bytes;
int valid; int valid;
} PointerCacheEntry; } PointerCacheEntry;
static PointerCacheEntry Entries[Ncache]; static PointerCacheEntry Entries[Ncache];
public: public:
static void *Insert(void *ptr,size_t bytes) ; static void *Insert(void *ptr,size_t bytes) ;
static void *Lookup(size_t bytes) ; static void *Lookup(size_t bytes) ;
}; };
std::string sizeString(size_t bytes); std::string sizeString(size_t bytes);
struct MemoryStats struct MemoryStats
{ {
size_t totalAllocated{0}, maxAllocated{0}, size_t totalAllocated{0}, maxAllocated{0},
currentlyAllocated{0}, totalFreed{0}; currentlyAllocated{0}, totalFreed{0};
}; };
class MemoryProfiler class MemoryProfiler
{ {
public: public:
static MemoryStats *stats; static MemoryStats *stats;
static bool debug; static bool debug;
}; };
#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" #define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")"
#define profilerDebugPrint \ #define profilerDebugPrint \
if (MemoryProfiler::stats)\ if (MemoryProfiler::stats) \
{\ { \
auto s = MemoryProfiler::stats;\ auto s = MemoryProfiler::stats; \
std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\ std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \
std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \
<< std::endl;\ << std::endl; \
std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \
<< std::endl;\ << std::endl; \
std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \
<< std::endl;\ << std::endl; \
std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \
<< std::endl;\ << std::endl; \
} }
#define profilerAllocate(bytes)\ #define profilerAllocate(bytes) \
if (MemoryProfiler::stats)\ if (MemoryProfiler::stats) \
{\ { \
auto s = MemoryProfiler::stats;\ auto s = MemoryProfiler::stats; \
s->totalAllocated += (bytes);\ s->totalAllocated += (bytes); \
s->currentlyAllocated += (bytes);\ s->currentlyAllocated += (bytes); \
s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated);\ s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \
}\ } \
if (MemoryProfiler::debug)\ if (MemoryProfiler::debug) \
{\ { \
std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl;\ std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \
profilerDebugPrint;\ profilerDebugPrint; \
} }
#define profilerFree(bytes)\ #define profilerFree(bytes) \
if (MemoryProfiler::stats)\ if (MemoryProfiler::stats) \
{\ { \
auto s = MemoryProfiler::stats;\ auto s = MemoryProfiler::stats; \
s->totalFreed += (bytes);\ s->totalFreed += (bytes); \
s->currentlyAllocated -= (bytes);\ s->currentlyAllocated -= (bytes); \
}\ } \
if (MemoryProfiler::debug)\ if (MemoryProfiler::debug) \
{\ { \
std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl;\ std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \
profilerDebugPrint;\ profilerDebugPrint; \
} }
void check_huge_pages(void *Buf,uint64_t BYTES); void check_huge_pages(void *Buf,uint64_t BYTES);
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// A lattice of something, but assume the something is SIMDized. // A lattice of something, but assume the something is SIMDized.
@ -159,7 +159,7 @@ public:
////////////////// //////////////////
// Hack 2MB align; could make option probably doesn't need configurability // Hack 2MB align; could make option probably doesn't need configurability
////////////////// //////////////////
//define GRID_ALLOC_ALIGN (128) //define GRID_ALLOC_ALIGN (128)
#define GRID_ALLOC_ALIGN (2*1024*1024) #define GRID_ALLOC_ALIGN (2*1024*1024)
#ifdef HAVE_MM_MALLOC_H #ifdef HAVE_MM_MALLOC_H
if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN); if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,GRID_ALLOC_ALIGN);
@ -205,8 +205,8 @@ template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, con
#ifdef GRID_COMMS_SHMEM #ifdef GRID_COMMS_SHMEM
extern "C" { extern "C" {
#include <mpp/shmem.h> #include <mpp/shmem.h>
extern void * shmem_align(size_t, size_t); extern void * shmem_align(size_t, size_t);
extern void shmem_free(void *); extern void shmem_free(void *);
} }
#define PARANOID_SYMMETRIC_HEAP #define PARANOID_SYMMETRIC_HEAP
#endif #endif
@ -276,7 +276,7 @@ public:
#endif #endif
uint8_t *cp = (uint8_t *)ptr; uint8_t *cp = (uint8_t *)ptr;
if ( ptr ) { if ( ptr ) {
// One touch per 4k page, static OMP loop to catch same loop order // One touch per 4k page, static OMP loop to catch same loop order
#pragma omp parallel for schedule(static) #pragma omp parallel for schedule(static)
for(size_type n=0;n<bytes;n+=4096){ for(size_type n=0;n<bytes;n+=4096){
cp[n]=0; cp[n]=0;
@ -309,5 +309,6 @@ template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
template<class T> using commVector = std::vector<T,commAllocator<T> >; template<class T> using commVector = std::vector<T,commAllocator<T> >;
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
}; // namespace Grid NAMESPACE_END(Grid);
#endif #endif