1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-18 07:47:06 +01:00

round-up after testing of prefetches in stencil close

This commit is contained in:
nmeyer-ur
2020-06-03 11:58:20 +02:00
parent 5050833b42
commit 5ee3ea2144
4 changed files with 41 additions and 10 deletions

View File

@ -68,8 +68,27 @@ void Gather_plane_simple_table (Vector<std::pair<int,int> >& table,const Lattice
int num=table.size();
std::pair<int,int> *table_v = & table[0];
auto rhs_v = rhs.View();
// main loop
accelerator_forNB( i,num, vobj::Nsimd(), {
typedef decltype(coalescedRead(buffer[0])) compressed_t;
// prefetching:
// +1% performance for Wilson on 32**4
// -2% performance for DW on 24**4 x 12
/*
const int dist = 2;
if (i+dist < num){
svbool_t pg1 = svptrue_b64();
// prefetch input
auto in = rhs_v(so+table_v[i+dist].second);
svprfd(pg1, (char*)&in, SV_PLDL1STRM);
// prefetch store buffer
uint64_t o = table_v[i+dist].first;
svprfd(pg1, (char*)&buffer[off+o], SV_PSTL1STRM);
}
*/
compressed_t tmp_c;
uint64_t o = table_v[i].first;
compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second));