/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/PerfCount.h Copyright (C) 2015 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Peter Boyle <peterboyle@MacBook-Pro.local> Author: paboyle <paboyle@ph.ed.ac.uk> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ #ifndef GRID_PERFCOUNT_H #define GRID_PERFCOUNT_H #include <sys/time.h> #include <ctime> #include <chrono> #include <string.h> #include <unistd.h> #include <sys/ioctl.h> #ifdef __linux__ #include <syscall.h> #include <linux/perf_event.h> #else #include <sys/syscall.h> #endif namespace Grid { #ifdef __linux__ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { int ret=0; ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); return ret; } #endif #ifdef __bgq__ inline uint64_t cyclecount(void){ uint64_t tmp; asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) ); return tmp; } #elif defined __x86_64__ #include <immintrin.h> #ifndef __INTEL_COMPILER #include <x86intrin.h> #endif inline uint64_t cyclecount(void){ return __rdtsc(); } #else #warning No cycle counter implemented for this architecture inline uint64_t cyclecount(void){ return 0; } #endif class PerformanceCounter { private: typedef struct { public: uint32_t type; uint64_t config; const char *name; } PerformanceCounterConfig; static const PerformanceCounterConfig PerformanceCounterConfigs []; public: enum PerformanceCounterType { CPUCYCLES=0, INSTRUCTIONS, // STALL_CYCLES, CACHE_REFERENCES, CACHE_MISSES, L1D_READ_MISS, L1D_READ_ACCESS, L1D_WRITE_MISS, L1D_WRITE_ACCESS, L1D_PREFETCH_MISS, L1D_PREFETCH_ACCESS, LL_READ_MISS, // LL_READ_ACCESS, LL_WRITE_MISS, LL_WRITE_ACCESS, LL_PREFETCH_MISS, LL_PREFETCH_ACCESS, L1I_READ_MISS, L1I_READ_ACCESS, PERFORMANCE_COUNTER_NUM_TYPES }; public: int PCT; long long count; int fd; unsigned long long elapsed; uint64_t begin; static int NumTypes(void){ return PERFORMANCE_COUNTER_NUM_TYPES; } PerformanceCounter(int _pct) { #ifdef __linux__ assert(_pct>=0); assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES); fd=-1; count=0; PCT =_pct; Open(); #endif } void Open(void) { #ifdef __linux__ struct perf_event_attr pe; memset(&pe, 0, sizeof(struct perf_event_attr)); pe.size = sizeof(struct perf_event_attr); pe.disabled = 1; pe.exclude_kernel = 1; pe.exclude_hv = 1; pe.inherit = 1; pe.type = PerformanceCounterConfigs[PCT].type; pe.config= PerformanceCounterConfigs[PCT].config; const char * name = PerformanceCounterConfigs[PCT].name; fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1 if (fd == -1) { fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name); perror("Error is"); } #endif } void Start(void) { #ifdef __linux__ if ( fd!= -1) { ::ioctl(fd, PERF_EVENT_IOC_RESET, 0); ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); } begin =cyclecount(); #else begin = 0; #endif } void Stop(void) { count=0; #ifdef __linux__ if ( fd!= -1) { ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ::read(fd, &count, sizeof(long long)); } elapsed = cyclecount() - begin; #else elapsed = 0; #endif } void Report(void) { #ifdef __linux__ std::printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count); #else std::printf("%llu cycles \n", elapsed ); #endif } ~PerformanceCounter() { #ifdef __linux__ ::close(fd); #endif } }; } #endif