mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
vnum variant for prefetches
This commit is contained in:
parent
4dd9e39e0d
commit
909acd55cd
@ -144,38 +144,38 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
||||||
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXd(base) \
|
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)8), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
||||||
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXd(base) \
|
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXd(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
||||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXd(A) \
|
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXd(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sUn](A)); baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
const auto & ref(U[sUn](A)); baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + -256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)-4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)8), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 768), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)12), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1024), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)16), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1280), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)20), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1536), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)24), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1792), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)28), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
||||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXd(A) \
|
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXd(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// LOAD_CHI
|
// LOAD_CHI
|
||||||
#define LOAD_CHI_A64FXd(base) \
|
#define LOAD_CHI_A64FXd(base) \
|
||||||
|
@ -144,38 +144,38 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
|||||||
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
// PREFETCH_CHIMU_L2 (prefetch to L2)
|
||||||
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXf(base) \
|
#define PREFETCH_CHIMU_L2_INTERNAL_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(8), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
// PREFETCH_CHIMU_L1 (prefetch to L1)
|
||||||
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXf(base) \
|
#define PREFETCH_CHIMU_L1_INTERNAL_A64FXf(base) \
|
||||||
{ \
|
{ \
|
||||||
svprfd(pg1, (int64_t*)(base + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(base + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(base), (int64_t)(8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
||||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A) \
|
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sUn](A)); baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
const auto & ref(U[sUn](A)); baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + -256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(-4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(0), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(4), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(8), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 768), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(12), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1024), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(16), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1280), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(20), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1536), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(24), SV_PLDL2STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 1792), SV_PLDL2STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(28), SV_PLDL2STRM); \
|
||||||
}
|
}
|
||||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
||||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A) \
|
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A) \
|
||||||
{ \
|
{ \
|
||||||
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
const auto & ref(U[sU](A)); baseU = (uint64_t)&ref; \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(0), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(4), SV_PLDL1STRM); \
|
||||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL1STRM); \
|
svprfd_vnum(pg1, (void*)(baseU), (int64_t)(8), SV_PLDL1STRM); \
|
||||||
}
|
}
|
||||||
// LOAD_CHI
|
// LOAD_CHI
|
||||||
#define LOAD_CHI_A64FXf(base) \
|
#define LOAD_CHI_A64FXf(base) \
|
||||||
|
Loading…
Reference in New Issue
Block a user