mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
KNL streaming stores, and KNL performance coutners
This commit is contained in:
@ -416,6 +416,28 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||
out);
|
||||
}
|
||||
#ifdef AVX512
|
||||
} else if (stat.is_init() ) {
|
||||
|
||||
int nthreads;
|
||||
stat.start();
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp master
|
||||
nthreads = omp_get_num_threads();
|
||||
int mythread = omp_get_thread_num();
|
||||
stat.enter(mythread);
|
||||
#pragma omp for nowait
|
||||
for(int ss=0;ss<U._grid->oSites();ss++)
|
||||
{
|
||||
int sU=ss;
|
||||
int sF=LLs*sU;
|
||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
||||
}
|
||||
stat.exit(mythread);
|
||||
}
|
||||
stat.accum(nthreads);
|
||||
#endif
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
|
@ -31,6 +31,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_QCD_WILSON_FERMION_5D_H
|
||||
#define GRID_QCD_WILSON_FERMION_5D_H
|
||||
|
||||
#include <Grid/Stat.h>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
namespace QCD {
|
||||
@ -60,6 +62,7 @@ namespace Grid {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef WilsonKernels<Impl> Kernels;
|
||||
PmuStat stat;
|
||||
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
|
@ -134,7 +134,9 @@
|
||||
////////////////////////////////
|
||||
// Xm
|
||||
////////////////////////////////
|
||||
#ifndef STREAM_STORE
|
||||
basep= (uint64_t) &out._odata[ss];
|
||||
#endif
|
||||
// basep= st.GetPFInfo(nent,plocal); nent++;
|
||||
if ( local ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
@ -229,7 +231,9 @@
|
||||
LOAD_CHI(base);
|
||||
}
|
||||
base= (uint64_t) &out._odata[ss];
|
||||
#ifndef STREAM_STORE
|
||||
PREFETCH_CHIMU(base);
|
||||
#endif
|
||||
{
|
||||
MULT_2SPIN_DIR_PFTM(Tm,basep);
|
||||
}
|
||||
|
Reference in New Issue
Block a user