1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-05 03:35:55 +01:00

round-up after testing of prefetches in stencil close

This commit is contained in:
nmeyer-ur 2020-06-03 11:58:20 +02:00
parent 5050833b42
commit 5ee3ea2144
4 changed files with 41 additions and 10 deletions

View File

@ -164,12 +164,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
MULT_2SPIN_1(Dir); \
PREFETCH_CHIMU(base); \
/* PREFETCH_GAUGE_L1(NxtDir); */ \
MULT_2SPIN_2; \
if (s == 0) { \
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
} \
RECON; \
nmu++; \
}
@ -180,12 +175,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
if((!local)&&(!st.same_node[Dir]) ) { \
LOAD_CHI(base); \
MULT_2SPIN_1(Dir); \
PREFETCH_CHIMU(base); \
/* PREFETCH_GAUGE_L1(NxtDir); */ \
MULT_2SPIN_2; \
if (s == 0) { \
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
} \
RECON; \
nmu++; \
}

View File

@ -445,18 +445,21 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); /* printf("."); */ return;}
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
#endif
} else if( interior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); /* printf("-"); */ return;}
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
#endif
} else if( exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
#ifndef GRID_NVCC
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); /* printf("+"); */ return;}
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
#endif
}
assert(0 && " Kernel optimisation case not covered ");

View File

@ -799,6 +799,25 @@ typedef veci SIMD_Itype; // Integer type
// prefetch utilities
inline void v_prefetch0(int size, const char *ptr){};
/* PF 256 worse than PF 64
inline void prefetch_HINT_T0(const char *ptr){
static int64_t last_ptr;
int64_t vptr = reinterpret_cast<std::intptr_t>(ptr) & 0x7fffffffffffff00ll;
if (last_ptr != vptr) {
last_ptr = vptr;
pred pg1 = Optimization::acle<double>::pg1();
svprfd(pg1, reinterpret_cast<int64_t*>(ptr), SV_PLDL1STRM);
svprfd(pg1, ptr, SV_PLDL1STRM);
}
};
*/
/* beneficial for operators?
inline void prefetch_HINT_T0(const char *ptr){
pred pg1 = Optimization::acle<double>::pg1();
svprfd(pg1, ptr, SV_PLDL1STRM);
};
*/
inline void prefetch_HINT_T0(const char *ptr){};
// Function name aliases

View File

@ -68,8 +68,27 @@ void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice
int num=table.size();
std::pair<int,int> *table_v = & table[0];
auto rhs_v = rhs.View();
// main loop
accelerator_forNB( i,num, vobj::Nsimd(), {
typedef decltype(coalescedRead(buffer[0])) compressed_t;
// prefetching:
// +1% performance for Wilson on 32**4
// -2% performance for DW on 24**4 x 12
/*
const int dist = 2;
if (i+dist < num){
svbool_t pg1 = svptrue_b64();
// prefetch input
auto in = rhs_v(so+table_v[i+dist].second);
svprfd(pg1, (char*)&in, SV_PLDL1STRM);
// prefetch store buffer
uint64_t o = table_v[i+dist].first;
svprfd(pg1, (char*)&buffer[off+o], SV_PSTL1STRM);
}
*/
compressed_t tmp_c;
uint64_t o = table_v[i].first;
compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second));