mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 12:47:05 +01:00
Refactoring header layout
This commit is contained in:
75
lib/perfmon/PerfCount.cc
Normal file
75
lib/perfmon/PerfCount.cc
Normal file
@ -0,0 +1,75 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/PerfCount.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
||||
#define RawConfig(A,B) (A<<8|B)
|
||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
||||
#ifdef __linux__
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES , "CACHE_REFERENCES..." , INSTRUCTIONS},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." , CACHE_REFERENCES},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS},
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES },
|
||||
// 4
|
||||
#ifdef AVX512
|
||||
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS },
|
||||
{ PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS },
|
||||
// 11
|
||||
#else
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS....",INSTRUCTIONS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS) , "L1D_WRITE_MISS.....",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS) , "L1D_WRITE_ACCESS...",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS..",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
|
||||
// 11
|
||||
#endif
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS.......",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS.....",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS....",L1D_READ_ACCESS},
|
||||
//15
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS...",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......",INSTRUCTIONS},
|
||||
{ PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS....",INSTRUCTIONS}
|
||||
//19
|
||||
// { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
|
||||
#endif
|
||||
};
|
||||
}
|
244
lib/perfmon/PerfCount.h
Normal file
244
lib/perfmon/PerfCount.h
Normal file
@ -0,0 +1,244 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/PerfCount.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_PERFCOUNT_H
|
||||
#define GRID_PERFCOUNT_H
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <chrono>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <syscall.h>
|
||||
#include <linux/perf_event.h>
|
||||
#else
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#ifdef __linux__
|
||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags)
|
||||
{
|
||||
int ret=0;
|
||||
|
||||
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||
group_fd, flags);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TIMERS_OFF
|
||||
|
||||
|
||||
inline uint64_t cyclecount(void){
|
||||
return 0;
|
||||
}
|
||||
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
|
||||
#define __SSC_STOP __SSC_MARK(0x110)
|
||||
#define __SSC_START __SSC_MARK(0x111)
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define __SSC_MARK(mark)
|
||||
#define __SSC_STOP
|
||||
#define __SSC_START
|
||||
|
||||
/*
|
||||
* cycle counters arch dependent
|
||||
*/
|
||||
|
||||
#ifdef __bgq__
|
||||
inline uint64_t cyclecount(void){
|
||||
uint64_t tmp;
|
||||
asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) );
|
||||
return tmp;
|
||||
}
|
||||
#elif defined __x86_64__
|
||||
inline uint64_t cyclecount(void){
|
||||
return __rdtsc();
|
||||
// unsigned int dummy;
|
||||
// return __rdtscp(&dummy);
|
||||
}
|
||||
#else
|
||||
|
||||
inline uint64_t cyclecount(void){
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
class PerformanceCounter {
|
||||
private:
|
||||
|
||||
typedef struct {
|
||||
public:
|
||||
uint32_t type;
|
||||
uint64_t config;
|
||||
const char *name;
|
||||
int normalisation;
|
||||
} PerformanceCounterConfig;
|
||||
|
||||
static const PerformanceCounterConfig PerformanceCounterConfigs [];
|
||||
|
||||
public:
|
||||
|
||||
enum PerformanceCounterType {
|
||||
CACHE_REFERENCES=0,
|
||||
CACHE_MISSES=1,
|
||||
CPUCYCLES=2,
|
||||
INSTRUCTIONS=3,
|
||||
L1D_READ_ACCESS=4,
|
||||
PERFORMANCE_COUNTER_NUM_TYPES=19
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
int PCT;
|
||||
|
||||
long long count;
|
||||
long long cycles;
|
||||
int fd;
|
||||
int cyclefd;
|
||||
unsigned long long elapsed;
|
||||
uint64_t begin;
|
||||
|
||||
static int NumTypes(void){
|
||||
return PERFORMANCE_COUNTER_NUM_TYPES;
|
||||
}
|
||||
|
||||
PerformanceCounter(int _pct) {
|
||||
#ifdef __linux__
|
||||
assert(_pct>=0);
|
||||
assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
|
||||
fd=-1;
|
||||
cyclefd=-1;
|
||||
count=0;
|
||||
cycles=0;
|
||||
PCT =_pct;
|
||||
Open();
|
||||
#endif
|
||||
}
|
||||
void Open(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
struct perf_event_attr pe;
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
|
||||
pe.disabled = 1;
|
||||
pe.exclude_kernel = 1;
|
||||
pe.exclude_hv = 1;
|
||||
pe.inherit = 1;
|
||||
|
||||
pe.type = PerformanceCounterConfigs[PCT].type;
|
||||
pe.config= PerformanceCounterConfigs[PCT].config;
|
||||
const char * name = PerformanceCounterConfigs[PCT].name;
|
||||
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
|
||||
perror("Error is");
|
||||
}
|
||||
int norm = PerformanceCounterConfigs[PCT].normalisation;
|
||||
pe.type = PerformanceCounterConfigs[norm].type;
|
||||
pe.config= PerformanceCounterConfigs[norm].config;
|
||||
name = PerformanceCounterConfigs[norm].name;
|
||||
cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
|
||||
if (cyclefd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
|
||||
perror("Error is");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Start(void)
|
||||
{
|
||||
#ifdef __linux__
|
||||
if ( fd!= -1) {
|
||||
::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||
::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
}
|
||||
begin =cyclecount();
|
||||
#else
|
||||
begin = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Stop(void) {
|
||||
count=0;
|
||||
cycles=0;
|
||||
size_t ign;
|
||||
#ifdef __linux__
|
||||
if ( fd!= -1) {
|
||||
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
ign=::read(fd, &count, sizeof(long long));
|
||||
ign=::read(cyclefd, &cycles, sizeof(long long));
|
||||
}
|
||||
elapsed = cyclecount() - begin;
|
||||
#else
|
||||
elapsed = 0;
|
||||
#endif
|
||||
|
||||
}
|
||||
void Report(void) {
|
||||
#ifdef __linux__
|
||||
int N = PerformanceCounterConfigs[PCT].normalisation;
|
||||
const char * sn = PerformanceCounterConfigs[N].name ;
|
||||
const char * sc = PerformanceCounterConfigs[PCT].name;
|
||||
std::printf("tsc = %llu %s = %llu %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles,
|
||||
sc, count, sc,sn, (double)count/(double)cycles);
|
||||
#else
|
||||
std::printf("%llu cycles \n", elapsed );
|
||||
#endif
|
||||
}
|
||||
|
||||
~PerformanceCounter()
|
||||
{
|
||||
#ifdef __linux__
|
||||
::close(fd); ::close(cyclefd);
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
247
lib/perfmon/Stat.cc
Normal file
247
lib/perfmon/Stat.cc
Normal file
@ -0,0 +1,247 @@
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/perfmon/Stat.h>
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
bool PmuStat::pmu_initialized=false;
|
||||
|
||||
|
||||
void PmuStat::init(const char *regname)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
name = regname;
|
||||
if (!pmu_initialized)
|
||||
{
|
||||
std::cout<<"initialising pmu"<<std::endl;
|
||||
pmu_initialized = true;
|
||||
pmu_init();
|
||||
}
|
||||
clear();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::clear(void)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
count = 0;
|
||||
tregion = 0;
|
||||
pmc0 = 0;
|
||||
pmc1 = 0;
|
||||
inst = 0;
|
||||
cyc = 0;
|
||||
ref = 0;
|
||||
tcycles = 0;
|
||||
reads = 0;
|
||||
writes = 0;
|
||||
#endif
|
||||
}
|
||||
void PmuStat::print(void)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
std::cout <<"Reg "<<std::string(name)<<":\n";
|
||||
std::cout <<" region "<<tregion<<std::endl;
|
||||
std::cout <<" cycles "<<tcycles<<std::endl;
|
||||
std::cout <<" inst "<<inst <<std::endl;
|
||||
std::cout <<" cyc "<<cyc <<std::endl;
|
||||
std::cout <<" ref "<<ref <<std::endl;
|
||||
std::cout <<" pmc0 "<<pmc0 <<std::endl;
|
||||
std::cout <<" pmc1 "<<pmc1 <<std::endl;
|
||||
std::cout <<" count "<<count <<std::endl;
|
||||
std::cout <<" reads "<<reads <<std::endl;
|
||||
std::cout <<" writes "<<writes <<std::endl;
|
||||
#endif
|
||||
}
|
||||
void PmuStat::start(void)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
pmu_start();
|
||||
++count;
|
||||
xmemctrs(&mrstart, &mwstart);
|
||||
tstart = __rdtsc();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::enter(int t)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
counters[0][t] = __rdpmc(0);
|
||||
counters[1][t] = __rdpmc(1);
|
||||
counters[2][t] = __rdpmc((1<<30)|0);
|
||||
counters[3][t] = __rdpmc((1<<30)|1);
|
||||
counters[4][t] = __rdpmc((1<<30)|2);
|
||||
counters[5][t] = __rdtsc();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::exit(int t)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
counters[0][t] = __rdpmc(0) - counters[0][t];
|
||||
counters[1][t] = __rdpmc(1) - counters[1][t];
|
||||
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
|
||||
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
|
||||
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
|
||||
counters[5][t] = __rdtsc() - counters[5][t];
|
||||
#endif
|
||||
}
|
||||
void PmuStat::accum(int nthreads)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
tend = __rdtsc();
|
||||
xmemctrs(&mrend, &mwend);
|
||||
pmu_stop();
|
||||
for (int t = 0; t < nthreads; ++t) {
|
||||
pmc0 += counters[0][t];
|
||||
pmc1 += counters[1][t];
|
||||
inst += counters[2][t];
|
||||
cyc += counters[3][t];
|
||||
ref += counters[4][t];
|
||||
tcycles += counters[5][t];
|
||||
}
|
||||
uint64_t region = tend - tstart;
|
||||
tregion += region;
|
||||
uint64_t mreads = mrend - mrstart;
|
||||
reads += mreads;
|
||||
uint64_t mwrites = mwend - mwstart;
|
||||
writes += mwrites;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void PmuStat::pmu_fini(void) {}
|
||||
void PmuStat::pmu_start(void) {};
|
||||
void PmuStat::pmu_stop(void) {};
|
||||
void PmuStat::pmu_init(void)
|
||||
{
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
KNLsetup();
|
||||
#endif
|
||||
}
|
||||
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
|
||||
{
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
ctrs c;
|
||||
KNLreadctrs(c);
|
||||
uint64_t emr = 0, emw = 0;
|
||||
for (int i = 0; i < NEDC; ++i)
|
||||
{
|
||||
emr += c.edcrd[i];
|
||||
emw += c.edcwr[i];
|
||||
}
|
||||
*mr = emr;
|
||||
*mw = emw;
|
||||
#else
|
||||
*mr = *mw = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
|
||||
struct knl_gbl_ PmuStat::gbl;
|
||||
|
||||
#define PMU_MEM
|
||||
|
||||
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
|
||||
{
|
||||
char fname[1024];
|
||||
snprintf(fname, sizeof(fname), "%s/type", ename);
|
||||
FILE *fp = fopen(fname, "r");
|
||||
if (fp == 0) {
|
||||
::printf("open %s", fname);
|
||||
::exit(0);
|
||||
}
|
||||
int type;
|
||||
int ret = fscanf(fp, "%d", &type);
|
||||
assert(ret == 1);
|
||||
fclose(fp);
|
||||
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
|
||||
|
||||
struct perf_event_attr hw = {};
|
||||
hw.size = sizeof(hw);
|
||||
hw.type = type;
|
||||
// see /sys/devices/uncore_*/format/*
|
||||
// All of the events we are interested in are configured the same way, but
|
||||
// that isn't always true. Proper code would parse the format files
|
||||
hw.config = event | (umask << 8);
|
||||
//hw.read_format = PERF_FORMAT_GROUP;
|
||||
// unfortunately the above only works within a single PMU; might
|
||||
// as well just read them one at a time
|
||||
int cpu = 0;
|
||||
fd = perf_event_open(&hw, -1, cpu, -1, 0);
|
||||
if (fd == -1) {
|
||||
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
|
||||
::exit(0);
|
||||
} else {
|
||||
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PmuStat::KNLsetup(void){
|
||||
|
||||
int ret;
|
||||
char fname[1024];
|
||||
|
||||
// MC RPQ inserts and WPQ inserts (reads & writes)
|
||||
for (int mc = 0; mc < NMC; ++mc)
|
||||
{
|
||||
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
|
||||
// RPQ Inserts
|
||||
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
|
||||
// WPQ Inserts
|
||||
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
|
||||
}
|
||||
// EDC RPQ inserts and WPQ inserts
|
||||
for (int edc=0; edc < NEDC; ++edc)
|
||||
{
|
||||
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
|
||||
// RPQ inserts
|
||||
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
|
||||
// WPQ inserts
|
||||
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
|
||||
}
|
||||
// EDC HitE, HitM, MissE, MissM
|
||||
for (int edc=0; edc < NEDC; ++edc)
|
||||
{
|
||||
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
|
||||
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
|
||||
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
|
||||
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
|
||||
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t PmuStat::KNLreadctr(int fd)
|
||||
{
|
||||
uint64_t data;
|
||||
size_t s = ::read(fd, &data, sizeof(data));
|
||||
if (s != sizeof(uint64_t)){
|
||||
::printf("read counter %lu", s);
|
||||
::exit(0);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
void PmuStat::KNLreadctrs(ctrs &c)
|
||||
{
|
||||
for (int i = 0; i < NMC; ++i)
|
||||
{
|
||||
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
|
||||
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
|
||||
}
|
||||
for (int i = 0; i < NEDC; ++i)
|
||||
{
|
||||
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
|
||||
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
|
||||
}
|
||||
for (int i = 0; i < NEDC; ++i)
|
||||
{
|
||||
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
|
||||
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
|
||||
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
|
||||
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
104
lib/perfmon/Stat.h
Normal file
104
lib/perfmon/Stat.h
Normal file
@ -0,0 +1,104 @@
|
||||
#ifndef _GRID_STAT_H
|
||||
#define _GRID_STAT_H
|
||||
|
||||
#ifdef AVX512
|
||||
#define _KNIGHTS_LANDING_ROOTONLY
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Extra KNL counters from MCDRAM
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
#define NMC 6
|
||||
#define NEDC 8
|
||||
struct ctrs
|
||||
{
|
||||
uint64_t mcrd[NMC];
|
||||
uint64_t mcwr[NMC];
|
||||
uint64_t edcrd[NEDC];
|
||||
uint64_t edcwr[NEDC];
|
||||
uint64_t edchite[NEDC];
|
||||
uint64_t edchitm[NEDC];
|
||||
uint64_t edcmisse[NEDC];
|
||||
uint64_t edcmissm[NEDC];
|
||||
};
|
||||
// Peter/Azusa:
|
||||
// Our modification of a code provided by Larry Meadows from Intel
|
||||
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
|
||||
// so is already public and in the linux kernel for KNL.
|
||||
struct knl_gbl_
|
||||
{
|
||||
int mc_rd[NMC];
|
||||
int mc_wr[NMC];
|
||||
int edc_rd[NEDC];
|
||||
int edc_wr[NEDC];
|
||||
int edc_hite[NEDC];
|
||||
int edc_hitm[NEDC];
|
||||
int edc_misse[NEDC];
|
||||
int edc_missm[NEDC];
|
||||
};
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class PmuStat
|
||||
{
|
||||
uint64_t counters[8][256];
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
static struct knl_gbl_ gbl;
|
||||
#endif
|
||||
const char *name;
|
||||
|
||||
uint64_t reads; // memory reads
|
||||
uint64_t writes; // memory writes
|
||||
uint64_t mrstart; // memory read counter at start of parallel region
|
||||
uint64_t mrend; // memory read counter at end of parallel region
|
||||
uint64_t mwstart; // memory write counter at start of parallel region
|
||||
uint64_t mwend; // memory write counter at end of parallel region
|
||||
|
||||
// cumulative counters
|
||||
uint64_t count; // number of invocations
|
||||
uint64_t tregion; // total time in parallel region (from thread 0)
|
||||
uint64_t tcycles; // total cycles inside parallel region
|
||||
uint64_t inst, ref, cyc; // fixed counters
|
||||
uint64_t pmc0, pmc1;// pmu
|
||||
// add memory counters here
|
||||
// temp variables
|
||||
uint64_t tstart; // tsc at start of parallel region
|
||||
uint64_t tend; // tsc at end of parallel region
|
||||
// map for ctrs values
|
||||
// 0 pmc0 start
|
||||
// 1 pmc0 end
|
||||
// 2 pmc1 start
|
||||
// 3 pmc1 end
|
||||
// 4 tsc start
|
||||
// 5 tsc end
|
||||
static bool pmu_initialized;
|
||||
public:
|
||||
static bool is_init(void){ return pmu_initialized;}
|
||||
static void pmu_init(void);
|
||||
static void pmu_fini(void);
|
||||
static void pmu_start(void);
|
||||
static void pmu_stop(void);
|
||||
void accum(int nthreads);
|
||||
static void xmemctrs(uint64_t *mr, uint64_t *mw);
|
||||
void start(void);
|
||||
void enter(int t);
|
||||
void exit(int t);
|
||||
void print(void);
|
||||
void init(const char *regname);
|
||||
void clear(void);
|
||||
#ifdef _KNIGHTS_LANDING_
|
||||
static void KNLsetup(void);
|
||||
static uint64_t KNLreadctr(int fd);
|
||||
static void KNLreadctrs(ctrs &c);
|
||||
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
102
lib/perfmon/Timer.h
Normal file
102
lib/perfmon/Timer.h
Normal file
@ -0,0 +1,102 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Timer.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_TIME_H
|
||||
#define GRID_TIME_H
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <chrono>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
|
||||
// Dress the output; use std::chrono
|
||||
|
||||
// C++11 time facilities better?
|
||||
inline double usecond(void) {
|
||||
struct timeval tv;
|
||||
#ifdef TIMERS_ON
|
||||
gettimeofday(&tv,NULL);
|
||||
#endif
|
||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
||||
}
|
||||
|
||||
typedef std::chrono::system_clock GridClock;
|
||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||
typedef std::chrono::milliseconds GridTime;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
|
||||
{
|
||||
stream << time.count()<<" ms";
|
||||
return stream;
|
||||
}
|
||||
|
||||
class GridStopWatch {
|
||||
private:
|
||||
bool running;
|
||||
GridTimePoint start;
|
||||
GridUsecs accumulator;
|
||||
public:
|
||||
GridStopWatch () {
|
||||
Reset();
|
||||
}
|
||||
void Start(void) {
|
||||
assert(running == false);
|
||||
#ifdef TIMERS_ON
|
||||
start = GridClock::now();
|
||||
#endif
|
||||
running = true;
|
||||
}
|
||||
void Stop(void) {
|
||||
assert(running == true);
|
||||
#ifdef TIMERS_ON
|
||||
accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start);
|
||||
#endif
|
||||
running = false;
|
||||
};
|
||||
void Reset(void){
|
||||
running = false;
|
||||
#ifdef TIMERS_ON
|
||||
start = GridClock::now();
|
||||
#endif
|
||||
accumulator = std::chrono::duration_cast<GridUsecs>(start-start);
|
||||
}
|
||||
GridTime Elapsed(void) {
|
||||
assert(running == false);
|
||||
return std::chrono::duration_cast<GridTime>( accumulator );
|
||||
}
|
||||
uint64_t useconds(void){
|
||||
assert(running == false);
|
||||
return (uint64_t) accumulator.count();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
Reference in New Issue
Block a user