mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-05 03:35:55 +01:00
round-up after testing of prefetches in stencil close
This commit is contained in:
parent
5050833b42
commit
5ee3ea2144
@ -164,12 +164,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||
LOAD_CHI(base); \
|
||||
MULT_2SPIN_1(Dir); \
|
||||
PREFETCH_CHIMU(base); \
|
||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
||||
MULT_2SPIN_2; \
|
||||
if (s == 0) { \
|
||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
||||
} \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
@ -180,12 +175,7 @@ Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
|
||||
if((!local)&&(!st.same_node[Dir]) ) { \
|
||||
LOAD_CHI(base); \
|
||||
MULT_2SPIN_1(Dir); \
|
||||
PREFETCH_CHIMU(base); \
|
||||
/* PREFETCH_GAUGE_L1(NxtDir); */ \
|
||||
MULT_2SPIN_2; \
|
||||
if (s == 0) { \
|
||||
if ((Dir == 0) || (Dir == 4)) { PREFETCH_GAUGE_L2(Dir); } \
|
||||
} \
|
||||
RECON; \
|
||||
nmu++; \
|
||||
}
|
||||
|
@ -445,18 +445,21 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); /* printf("."); */ return;}
|
||||
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;}
|
||||
#endif
|
||||
} else if( interior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); /* printf("-"); */ return;}
|
||||
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||
#ifndef GRID_NVCC
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); /* printf("+"); */ return;}
|
||||
//if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;}
|
||||
#endif
|
||||
}
|
||||
assert(0 && " Kernel optimisation case not covered ");
|
||||
|
@ -799,6 +799,25 @@ typedef veci SIMD_Itype; // Integer type
|
||||
|
||||
// prefetch utilities
|
||||
inline void v_prefetch0(int size, const char *ptr){};
|
||||
|
||||
/* PF 256 worse than PF 64
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
static int64_t last_ptr;
|
||||
int64_t vptr = reinterpret_cast<std::intptr_t>(ptr) & 0x7fffffffffffff00ll;
|
||||
if (last_ptr != vptr) {
|
||||
last_ptr = vptr;
|
||||
pred pg1 = Optimization::acle<double>::pg1();
|
||||
svprfd(pg1, reinterpret_cast<int64_t*>(ptr), SV_PLDL1STRM);
|
||||
svprfd(pg1, ptr, SV_PLDL1STRM);
|
||||
}
|
||||
};
|
||||
*/
|
||||
/* beneficial for operators?
|
||||
inline void prefetch_HINT_T0(const char *ptr){
|
||||
pred pg1 = Optimization::acle<double>::pg1();
|
||||
svprfd(pg1, ptr, SV_PLDL1STRM);
|
||||
};
|
||||
*/
|
||||
inline void prefetch_HINT_T0(const char *ptr){};
|
||||
|
||||
// Function name aliases
|
||||
|
@ -68,8 +68,27 @@ void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice
|
||||
int num=table.size();
|
||||
std::pair<int,int> *table_v = & table[0];
|
||||
auto rhs_v = rhs.View();
|
||||
|
||||
// main loop
|
||||
accelerator_forNB( i,num, vobj::Nsimd(), {
|
||||
typedef decltype(coalescedRead(buffer[0])) compressed_t;
|
||||
// prefetching:
|
||||
// +1% performance for Wilson on 32**4
|
||||
// -2% performance for DW on 24**4 x 12
|
||||
/*
|
||||
const int dist = 2;
|
||||
if (i+dist < num){
|
||||
svbool_t pg1 = svptrue_b64();
|
||||
|
||||
// prefetch input
|
||||
auto in = rhs_v(so+table_v[i+dist].second);
|
||||
svprfd(pg1, (char*)&in, SV_PLDL1STRM);
|
||||
|
||||
// prefetch store buffer
|
||||
uint64_t o = table_v[i+dist].first;
|
||||
svprfd(pg1, (char*)&buffer[off+o], SV_PSTL1STRM);
|
||||
}
|
||||
*/
|
||||
compressed_t tmp_c;
|
||||
uint64_t o = table_v[i].first;
|
||||
compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second));
|
||||
|
Loading…
x
Reference in New Issue
Block a user