1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

first attempt to write C terms in clover derivative. Some shifts to be fixed

This commit is contained in:
David Preti
2017-10-05 10:13:53 +02:00
parent 56478d63a5
commit d810e8c8fb
3 changed files with 77 additions and 31 deletions

View File

@ -90,9 +90,10 @@ inline uint64_t cyclecount(void){
}
#elif defined __x86_64__
inline uint64_t cyclecount(void){
return __rdtsc();
//return __rdtsc();
// unsigned int dummy;
// return __rdtscp(&dummy);
return 0; // <- remove this;
}
#else

View File

@ -57,35 +57,37 @@ void PmuStat::start(void)
pmu_start();
++count;
xmemctrs(&mrstart, &mwstart);
tstart = __rdtsc();
//tstart = __rdtsc();
tstart=0;
#endif
}
void PmuStat::enter(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0);
counters[1][t] = __rdpmc(1);
counters[2][t] = __rdpmc((1<<30)|0);
counters[3][t] = __rdpmc((1<<30)|1);
counters[4][t] = __rdpmc((1<<30)|2);
counters[5][t] = __rdtsc();
counters[0][t] = 0;//__rdpmc(0);
counters[1][t] = 0;//__rdpmc(1);
counters[2][t] = 0;//__rdpmc((1<<30)|0);
counters[3][t] = 0;//__rdpmc((1<<30)|1);
counters[4][t] = 0;//__rdpmc((1<<30)|2);
counters[5][t] = 0;//__rdtsc();
#endif
}
void PmuStat::exit(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0) - counters[0][t];
counters[1][t] = __rdpmc(1) - counters[1][t];
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = __rdtsc() - counters[5][t];
counters[0][t] = 0;//__rdpmc(0) - counters[0][t];
counters[1][t] = 0;// __rdpmc(1) - counters[1][t];
counters[2][t] = 0;// __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = 0;// __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = 0;// __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = 0;// __rdtsc() - counters[5][t];
#endif
}
void PmuStat::accum(int nthreads)
{
#ifdef __x86_64__
tend = __rdtsc();
// tend = __rdtsc();
tend =0 ;
xmemctrs(&mrend, &mwend);
pmu_stop();
for (int t = 0; t < nthreads; ++t) {