mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Refactoring header layout
This commit is contained in:
		
							
								
								
									
										75
									
								
								lib/perfmon/PerfCount.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								lib/perfmon/PerfCount.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,75 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/PerfCount.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
#include <Grid/perfmon/PerfCount.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
 | 
			
		||||
#define RawConfig(A,B) (A<<8|B)
 | 
			
		||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES    ,  "CACHE_REFERENCES..." , INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES        ,  "CACHE_MISSES......." , CACHE_REFERENCES},
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES          ,  "CPUCYCLES.........." , INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS        ,  "INSTRUCTIONS......." , CPUCYCLES   },
 | 
			
		||||
    // 4
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES    },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS  },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS    },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS  },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS  },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS },
 | 
			
		||||
    { PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS },
 | 
			
		||||
    // 11
 | 
			
		||||
#else
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS)     ,  "L1D_READ_ACCESS....",INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS)       ,  "L1D_READ_MISS......",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,MISS)      ,  "L1D_WRITE_MISS.....",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,WRITE,ACCESS)    ,  "L1D_WRITE_ACCESS...",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS)   ,  "L1D_PREFETCH_MISS..",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) ,  "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS},
 | 
			
		||||
    // 11
 | 
			
		||||
#endif
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS)        ,  "LL_READ_MISS.......",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS)      ,  "LL_READ_ACCESS.....",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS)       ,  "LL_WRITE_MISS......",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS)     ,  "LL_WRITE_ACCESS....",L1D_READ_ACCESS},
 | 
			
		||||
    //15
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS)    ,  "LL_PREFETCH_MISS...",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS)  ,  "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS)       ,  "L1I_READ_MISS......",INSTRUCTIONS},
 | 
			
		||||
  { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS)     ,  "L1I_READ_ACCESS....",INSTRUCTIONS}
 | 
			
		||||
    //19
 | 
			
		||||
  //  { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" },
 | 
			
		||||
#endif
 | 
			
		||||
};
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										244
									
								
								lib/perfmon/PerfCount.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										244
									
								
								lib/perfmon/PerfCount.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,244 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/PerfCount.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <peterboyle@MacBook-Pro.local>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_PERFCOUNT_H
 | 
			
		||||
#define GRID_PERFCOUNT_H
 | 
			
		||||
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#include <ctime>
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/ioctl.h>
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
#include <syscall.h>
 | 
			
		||||
#include <linux/perf_event.h>
 | 
			
		||||
#else
 | 
			
		||||
#include <sys/syscall.h>
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
#include <x86intrin.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
 | 
			
		||||
			    int cpu, int group_fd, unsigned long flags)
 | 
			
		||||
{
 | 
			
		||||
  int ret=0;
 | 
			
		||||
 | 
			
		||||
  ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
 | 
			
		||||
		group_fd, flags);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef TIMERS_OFF
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
#define __SSC_MARK(mark) __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(mark):"%ebx")
 | 
			
		||||
#define __SSC_STOP  __SSC_MARK(0x110)
 | 
			
		||||
#define __SSC_START __SSC_MARK(0x111)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#define __SSC_MARK(mark) 
 | 
			
		||||
#define __SSC_STOP  
 | 
			
		||||
#define __SSC_START 
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * cycle counters arch dependent
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifdef __bgq__
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
   uint64_t tmp;
 | 
			
		||||
   asm volatile ("mfspr %0,0x10C" : "=&r" (tmp)  );
 | 
			
		||||
   return tmp;
 | 
			
		||||
}
 | 
			
		||||
#elif defined __x86_64__
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
  return __rdtsc();
 | 
			
		||||
  //  unsigned int dummy;
 | 
			
		||||
  // return __rdtscp(&dummy);
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
inline uint64_t cyclecount(void){ 
 | 
			
		||||
   return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
class PerformanceCounter {
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  typedef struct { 
 | 
			
		||||
  public:
 | 
			
		||||
    uint32_t type;
 | 
			
		||||
    uint64_t config;
 | 
			
		||||
    const char *name;
 | 
			
		||||
    int normalisation;
 | 
			
		||||
  } PerformanceCounterConfig; 
 | 
			
		||||
  
 | 
			
		||||
  static const PerformanceCounterConfig PerformanceCounterConfigs [];
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  enum PerformanceCounterType {
 | 
			
		||||
    CACHE_REFERENCES=0,
 | 
			
		||||
    CACHE_MISSES=1,
 | 
			
		||||
    CPUCYCLES=2,
 | 
			
		||||
    INSTRUCTIONS=3,
 | 
			
		||||
    L1D_READ_ACCESS=4,
 | 
			
		||||
    PERFORMANCE_COUNTER_NUM_TYPES=19
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    
 | 
			
		||||
  int PCT;
 | 
			
		||||
 | 
			
		||||
  long long count;
 | 
			
		||||
  long long cycles;
 | 
			
		||||
  int fd;
 | 
			
		||||
  int cyclefd;
 | 
			
		||||
  unsigned long long elapsed;
 | 
			
		||||
  uint64_t begin;
 | 
			
		||||
 | 
			
		||||
  static int NumTypes(void){ 
 | 
			
		||||
    return PERFORMANCE_COUNTER_NUM_TYPES;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  PerformanceCounter(int _pct) {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    assert(_pct>=0);
 | 
			
		||||
    assert(_pct<PERFORMANCE_COUNTER_NUM_TYPES);
 | 
			
		||||
    fd=-1;
 | 
			
		||||
    cyclefd=-1;
 | 
			
		||||
    count=0;
 | 
			
		||||
    cycles=0;
 | 
			
		||||
    PCT =_pct;
 | 
			
		||||
    Open();
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
  void Open(void) 
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    struct perf_event_attr pe;
 | 
			
		||||
    memset(&pe, 0, sizeof(struct perf_event_attr));
 | 
			
		||||
    pe.size = sizeof(struct perf_event_attr);
 | 
			
		||||
 | 
			
		||||
    pe.disabled = 1;
 | 
			
		||||
    pe.exclude_kernel = 1;
 | 
			
		||||
    pe.exclude_hv = 1;
 | 
			
		||||
    pe.inherit    = 1;
 | 
			
		||||
 | 
			
		||||
    pe.type  = PerformanceCounterConfigs[PCT].type;
 | 
			
		||||
    pe.config= PerformanceCounterConfigs[PCT].config;
 | 
			
		||||
    const char * name = PerformanceCounterConfigs[PCT].name;
 | 
			
		||||
    fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 | 
			
		||||
    if (fd == -1) {
 | 
			
		||||
      fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
 | 
			
		||||
      perror("Error is");
 | 
			
		||||
    }
 | 
			
		||||
    int norm = PerformanceCounterConfigs[PCT].normalisation;
 | 
			
		||||
    pe.type  = PerformanceCounterConfigs[norm].type;
 | 
			
		||||
    pe.config= PerformanceCounterConfigs[norm].config;
 | 
			
		||||
    name = PerformanceCounterConfigs[norm].name;
 | 
			
		||||
    cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
 | 
			
		||||
    if (cyclefd == -1) {
 | 
			
		||||
      fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
 | 
			
		||||
      perror("Error is");
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void Start(void)
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    if ( fd!= -1) {
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
      ::ioctl(cyclefd, PERF_EVENT_IOC_RESET, 0);
 | 
			
		||||
      ::ioctl(cyclefd, PERF_EVENT_IOC_ENABLE, 0);
 | 
			
		||||
    }
 | 
			
		||||
    begin  =cyclecount();
 | 
			
		||||
#else
 | 
			
		||||
    begin = 0;
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void Stop(void) {
 | 
			
		||||
    count=0;
 | 
			
		||||
    cycles=0;
 | 
			
		||||
    size_t ign;
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    if ( fd!= -1) {
 | 
			
		||||
      ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
 | 
			
		||||
      ign=::read(fd, &count, sizeof(long long));
 | 
			
		||||
      ign=::read(cyclefd, &cycles, sizeof(long long));
 | 
			
		||||
    }
 | 
			
		||||
    elapsed = cyclecount() - begin;
 | 
			
		||||
#else
 | 
			
		||||
    elapsed = 0;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  void Report(void) {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    int N = PerformanceCounterConfigs[PCT].normalisation;
 | 
			
		||||
    const char * sn = PerformanceCounterConfigs[N].name ;
 | 
			
		||||
    const char * sc = PerformanceCounterConfigs[PCT].name;
 | 
			
		||||
      std::printf("tsc = %llu %s = %llu  %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles, 
 | 
			
		||||
		  sc, count, sc,sn, (double)count/(double)cycles);
 | 
			
		||||
#else
 | 
			
		||||
    std::printf("%llu cycles \n", elapsed );
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ~PerformanceCounter()
 | 
			
		||||
  {
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
    ::close(fd);    ::close(cyclefd);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										247
									
								
								lib/perfmon/Stat.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										247
									
								
								lib/perfmon/Stat.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,247 @@
 | 
			
		||||
#include <Grid/GridCore.h>
 | 
			
		||||
#include <Grid/perfmon/PerfCount.h>
 | 
			
		||||
#include <Grid/perfmon/Stat.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bool PmuStat::pmu_initialized=false;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void PmuStat::init(const char *regname)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  name = regname;
 | 
			
		||||
  if (!pmu_initialized)
 | 
			
		||||
    {
 | 
			
		||||
      std::cout<<"initialising pmu"<<std::endl;
 | 
			
		||||
      pmu_initialized = true;
 | 
			
		||||
      pmu_init();
 | 
			
		||||
    }
 | 
			
		||||
  clear();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::clear(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  count = 0;
 | 
			
		||||
  tregion = 0;
 | 
			
		||||
  pmc0 = 0;
 | 
			
		||||
  pmc1 = 0;
 | 
			
		||||
  inst = 0;
 | 
			
		||||
  cyc = 0;
 | 
			
		||||
  ref = 0;
 | 
			
		||||
  tcycles = 0;
 | 
			
		||||
  reads = 0;
 | 
			
		||||
  writes = 0;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::print(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  std::cout <<"Reg "<<std::string(name)<<":\n";
 | 
			
		||||
  std::cout <<"  region "<<tregion<<std::endl;
 | 
			
		||||
  std::cout <<"  cycles "<<tcycles<<std::endl;
 | 
			
		||||
  std::cout <<"  inst   "<<inst   <<std::endl;
 | 
			
		||||
  std::cout <<"  cyc    "<<cyc    <<std::endl;
 | 
			
		||||
  std::cout <<"  ref    "<<ref    <<std::endl;
 | 
			
		||||
  std::cout <<"  pmc0   "<<pmc0   <<std::endl;
 | 
			
		||||
  std::cout <<"  pmc1   "<<pmc1   <<std::endl;
 | 
			
		||||
  std::cout <<"  count  "<<count  <<std::endl;
 | 
			
		||||
  std::cout <<"  reads  "<<reads  <<std::endl;
 | 
			
		||||
  std::cout <<"  writes "<<writes <<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::start(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  pmu_start();
 | 
			
		||||
  ++count;
 | 
			
		||||
  xmemctrs(&mrstart, &mwstart);
 | 
			
		||||
  tstart = __rdtsc();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::enter(int t)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  counters[0][t] = __rdpmc(0);
 | 
			
		||||
  counters[1][t] = __rdpmc(1);
 | 
			
		||||
  counters[2][t] = __rdpmc((1<<30)|0);
 | 
			
		||||
  counters[3][t] = __rdpmc((1<<30)|1);
 | 
			
		||||
  counters[4][t] = __rdpmc((1<<30)|2);
 | 
			
		||||
  counters[5][t] = __rdtsc();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::exit(int t)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  counters[0][t] = __rdpmc(0) - counters[0][t];
 | 
			
		||||
  counters[1][t] = __rdpmc(1) - counters[1][t];
 | 
			
		||||
  counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
 | 
			
		||||
  counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
 | 
			
		||||
  counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
 | 
			
		||||
  counters[5][t] = __rdtsc() - counters[5][t];
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::accum(int nthreads)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __x86_64__
 | 
			
		||||
  tend = __rdtsc();
 | 
			
		||||
  xmemctrs(&mrend, &mwend);
 | 
			
		||||
  pmu_stop();
 | 
			
		||||
  for (int t = 0; t < nthreads; ++t) {
 | 
			
		||||
    pmc0 += counters[0][t];
 | 
			
		||||
    pmc1 += counters[1][t];
 | 
			
		||||
    inst += counters[2][t];
 | 
			
		||||
    cyc += counters[3][t];
 | 
			
		||||
    ref += counters[4][t];
 | 
			
		||||
    tcycles += counters[5][t];
 | 
			
		||||
  }
 | 
			
		||||
  uint64_t region = tend - tstart;
 | 
			
		||||
  tregion += region;
 | 
			
		||||
  uint64_t mreads = mrend - mrstart;
 | 
			
		||||
  reads += mreads;
 | 
			
		||||
  uint64_t mwrites = mwend - mwstart;
 | 
			
		||||
  writes += mwrites;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void PmuStat::pmu_fini(void) {}
 | 
			
		||||
void PmuStat::pmu_start(void) {};
 | 
			
		||||
void PmuStat::pmu_stop(void) {};
 | 
			
		||||
void PmuStat::pmu_init(void)
 | 
			
		||||
{
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
  KNLsetup();
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
 | 
			
		||||
{
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
  ctrs c;
 | 
			
		||||
  KNLreadctrs(c);
 | 
			
		||||
  uint64_t emr = 0, emw = 0;
 | 
			
		||||
  for (int i = 0; i < NEDC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      emr += c.edcrd[i];
 | 
			
		||||
      emw += c.edcwr[i];
 | 
			
		||||
    }
 | 
			
		||||
  *mr = emr;
 | 
			
		||||
  *mw = emw;
 | 
			
		||||
#else
 | 
			
		||||
  *mr = *mw = 0;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
 | 
			
		||||
struct knl_gbl_ PmuStat::gbl;
 | 
			
		||||
 | 
			
		||||
#define PMU_MEM
 | 
			
		||||
 | 
			
		||||
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
 | 
			
		||||
{
 | 
			
		||||
  char fname[1024];
 | 
			
		||||
  snprintf(fname, sizeof(fname), "%s/type", ename);
 | 
			
		||||
  FILE *fp = fopen(fname, "r");
 | 
			
		||||
  if (fp == 0) {
 | 
			
		||||
    ::printf("open %s", fname);
 | 
			
		||||
    ::exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  int type;
 | 
			
		||||
  int ret = fscanf(fp, "%d", &type);
 | 
			
		||||
  assert(ret == 1);
 | 
			
		||||
  fclose(fp);
 | 
			
		||||
  //  std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
 | 
			
		||||
 | 
			
		||||
  struct perf_event_attr hw = {};
 | 
			
		||||
  hw.size = sizeof(hw);
 | 
			
		||||
  hw.type = type;
 | 
			
		||||
  // see /sys/devices/uncore_*/format/*
 | 
			
		||||
  // All of the events we are interested in are configured the same way, but
 | 
			
		||||
  // that isn't always true. Proper code would parse the format files
 | 
			
		||||
  hw.config = event | (umask << 8);
 | 
			
		||||
  //hw.read_format = PERF_FORMAT_GROUP;
 | 
			
		||||
  // unfortunately the above only works within a single PMU; might
 | 
			
		||||
  // as well just read them one at a time
 | 
			
		||||
  int cpu = 0;
 | 
			
		||||
  fd = perf_event_open(&hw, -1, cpu, -1, 0);
 | 
			
		||||
  if (fd == -1) {
 | 
			
		||||
    ::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
 | 
			
		||||
    ::exit(0);
 | 
			
		||||
  } else { 
 | 
			
		||||
    //    std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 void PmuStat::KNLsetup(void){
 | 
			
		||||
 | 
			
		||||
   int ret;
 | 
			
		||||
   char fname[1024];
 | 
			
		||||
 | 
			
		||||
   // MC RPQ inserts and WPQ inserts (reads & writes)
 | 
			
		||||
   for (int mc = 0; mc < NMC; ++mc)
 | 
			
		||||
     {
 | 
			
		||||
       ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
 | 
			
		||||
       // RPQ Inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
 | 
			
		||||
       // WPQ Inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
 | 
			
		||||
     }
 | 
			
		||||
   // EDC RPQ inserts and WPQ inserts
 | 
			
		||||
   for (int edc=0; edc < NEDC; ++edc)
 | 
			
		||||
     {
 | 
			
		||||
       ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
 | 
			
		||||
       // RPQ inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
 | 
			
		||||
       // WPQ inserts
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
 | 
			
		||||
     }
 | 
			
		||||
   // EDC HitE, HitM, MissE, MissM
 | 
			
		||||
   for (int edc=0; edc < NEDC; ++edc)
 | 
			
		||||
     {
 | 
			
		||||
       ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
 | 
			
		||||
       KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
 | 
			
		||||
     }
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
uint64_t PmuStat::KNLreadctr(int fd)
 | 
			
		||||
{
 | 
			
		||||
  uint64_t data;
 | 
			
		||||
  size_t s = ::read(fd, &data, sizeof(data));
 | 
			
		||||
  if (s != sizeof(uint64_t)){
 | 
			
		||||
    ::printf("read counter %lu", s);
 | 
			
		||||
    ::exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  return data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void PmuStat::KNLreadctrs(ctrs &c)
 | 
			
		||||
{
 | 
			
		||||
  for (int i = 0; i < NMC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
 | 
			
		||||
      c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
 | 
			
		||||
    }
 | 
			
		||||
  for (int i = 0; i < NEDC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
 | 
			
		||||
      c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
 | 
			
		||||
    }
 | 
			
		||||
  for (int i = 0; i < NEDC; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
 | 
			
		||||
      c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
 | 
			
		||||
      c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
 | 
			
		||||
      c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										104
									
								
								lib/perfmon/Stat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								lib/perfmon/Stat.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,104 @@
 | 
			
		||||
#ifndef _GRID_STAT_H
 | 
			
		||||
#define _GRID_STAT_H
 | 
			
		||||
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
#define _KNIGHTS_LANDING_ROOTONLY
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Extra KNL counters from MCDRAM
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
#define NMC 6
 | 
			
		||||
#define NEDC 8
 | 
			
		||||
struct ctrs
 | 
			
		||||
{
 | 
			
		||||
    uint64_t mcrd[NMC];
 | 
			
		||||
    uint64_t mcwr[NMC];
 | 
			
		||||
    uint64_t edcrd[NEDC]; 
 | 
			
		||||
    uint64_t edcwr[NEDC];
 | 
			
		||||
    uint64_t edchite[NEDC];
 | 
			
		||||
    uint64_t edchitm[NEDC];
 | 
			
		||||
    uint64_t edcmisse[NEDC];
 | 
			
		||||
    uint64_t edcmissm[NEDC];
 | 
			
		||||
};
 | 
			
		||||
// Peter/Azusa:
 | 
			
		||||
// Our modification of a code provided by Larry Meadows from Intel
 | 
			
		||||
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
 | 
			
		||||
// so is already public and in the linux kernel for KNL.
 | 
			
		||||
struct knl_gbl_
 | 
			
		||||
{
 | 
			
		||||
  int mc_rd[NMC];
 | 
			
		||||
  int mc_wr[NMC];
 | 
			
		||||
  int edc_rd[NEDC];
 | 
			
		||||
  int edc_wr[NEDC];
 | 
			
		||||
  int edc_hite[NEDC];
 | 
			
		||||
  int edc_hitm[NEDC];
 | 
			
		||||
  int edc_misse[NEDC];
 | 
			
		||||
  int edc_missm[NEDC];
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
class PmuStat
 | 
			
		||||
{
 | 
			
		||||
    uint64_t counters[8][256];
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
    static struct knl_gbl_ gbl;
 | 
			
		||||
#endif
 | 
			
		||||
    const char *name;
 | 
			
		||||
 | 
			
		||||
    uint64_t reads;     // memory reads
 | 
			
		||||
    uint64_t writes;    // memory writes
 | 
			
		||||
    uint64_t mrstart;   // memory read counter at start of parallel region
 | 
			
		||||
    uint64_t mrend;     // memory read counter at end of parallel region
 | 
			
		||||
    uint64_t mwstart;   // memory write counter at start of parallel region
 | 
			
		||||
    uint64_t mwend;     // memory write counter at end of parallel region
 | 
			
		||||
 | 
			
		||||
    // cumulative counters
 | 
			
		||||
    uint64_t count;     // number of invocations
 | 
			
		||||
    uint64_t tregion;   // total time in parallel region (from thread 0)
 | 
			
		||||
    uint64_t tcycles;   // total cycles inside parallel region
 | 
			
		||||
    uint64_t inst, ref, cyc;   // fixed counters
 | 
			
		||||
    uint64_t pmc0, pmc1;// pmu
 | 
			
		||||
    // add memory counters here
 | 
			
		||||
    // temp variables
 | 
			
		||||
    uint64_t tstart;    // tsc at start of parallel region
 | 
			
		||||
    uint64_t tend;      // tsc at end of parallel region
 | 
			
		||||
    // map for ctrs values
 | 
			
		||||
    // 0 pmc0 start
 | 
			
		||||
    // 1 pmc0 end
 | 
			
		||||
    // 2 pmc1 start
 | 
			
		||||
    // 3 pmc1 end
 | 
			
		||||
    // 4 tsc start
 | 
			
		||||
    // 5 tsc end
 | 
			
		||||
    static bool pmu_initialized;
 | 
			
		||||
public:
 | 
			
		||||
    static bool is_init(void){ return pmu_initialized;}
 | 
			
		||||
    static void pmu_init(void);
 | 
			
		||||
    static void pmu_fini(void);
 | 
			
		||||
    static void pmu_start(void);
 | 
			
		||||
    static void pmu_stop(void);
 | 
			
		||||
    void accum(int nthreads);
 | 
			
		||||
    static void xmemctrs(uint64_t *mr, uint64_t *mw);
 | 
			
		||||
    void start(void);
 | 
			
		||||
    void enter(int t);
 | 
			
		||||
    void exit(int t);
 | 
			
		||||
    void print(void);
 | 
			
		||||
    void init(const char *regname);
 | 
			
		||||
    void clear(void);
 | 
			
		||||
#ifdef _KNIGHTS_LANDING_
 | 
			
		||||
    static void     KNLsetup(void);
 | 
			
		||||
    static uint64_t KNLreadctr(int fd);
 | 
			
		||||
    static void     KNLreadctrs(ctrs &c);
 | 
			
		||||
    static void     KNLevsetup(const char *ename, int &fd, int event, int umask);
 | 
			
		||||
#endif
 | 
			
		||||
    
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										102
									
								
								lib/perfmon/Timer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								lib/perfmon/Timer.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,102 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./lib/Timer.h
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#ifndef GRID_TIME_H
 | 
			
		||||
#define GRID_TIME_H
 | 
			
		||||
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#include <ctime>
 | 
			
		||||
#include <chrono>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Dress the output; use std::chrono
 | 
			
		||||
 | 
			
		||||
// C++11 time facilities better?
 | 
			
		||||
inline double usecond(void) {
 | 
			
		||||
  struct timeval tv;
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
  gettimeofday(&tv,NULL);
 | 
			
		||||
#endif
 | 
			
		||||
  return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
typedef  std::chrono::system_clock          GridClock;
 | 
			
		||||
typedef  std::chrono::time_point<GridClock> GridTimePoint;
 | 
			
		||||
typedef  std::chrono::milliseconds          GridTime;
 | 
			
		||||
typedef  std::chrono::microseconds          GridUsecs;
 | 
			
		||||
 | 
			
		||||
inline std::ostream& operator<< (std::ostream & stream, const std::chrono::milliseconds & time)
 | 
			
		||||
{
 | 
			
		||||
  stream << time.count()<<" ms";
 | 
			
		||||
  return stream;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
class GridStopWatch {
 | 
			
		||||
private:
 | 
			
		||||
  bool running;
 | 
			
		||||
  GridTimePoint start;
 | 
			
		||||
  GridUsecs accumulator;
 | 
			
		||||
public:
 | 
			
		||||
  GridStopWatch () { 
 | 
			
		||||
    Reset();
 | 
			
		||||
  }
 | 
			
		||||
  void     Start(void) { 
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
    start = GridClock::now(); 
 | 
			
		||||
#endif
 | 
			
		||||
    running = true;
 | 
			
		||||
  }
 | 
			
		||||
  void     Stop(void)  { 
 | 
			
		||||
    assert(running == true);
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
    accumulator+= std::chrono::duration_cast<GridUsecs>(GridClock::now()-start); 
 | 
			
		||||
#endif
 | 
			
		||||
    running = false; 
 | 
			
		||||
  };
 | 
			
		||||
  void     Reset(void){
 | 
			
		||||
    running = false;
 | 
			
		||||
#ifdef TIMERS_ON
 | 
			
		||||
    start = GridClock::now();
 | 
			
		||||
#endif
 | 
			
		||||
    accumulator = std::chrono::duration_cast<GridUsecs>(start-start); 
 | 
			
		||||
  }
 | 
			
		||||
  GridTime Elapsed(void) {
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return std::chrono::duration_cast<GridTime>( accumulator );
 | 
			
		||||
  }
 | 
			
		||||
  uint64_t useconds(void){
 | 
			
		||||
    assert(running == false);
 | 
			
		||||
    return (uint64_t) accumulator.count();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
		Reference in New Issue
	
	Block a user