mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 15:27:06 +01:00
now with pf, best results so far using intrinsics+pf
This commit is contained in:
@ -26,20 +26,20 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#define LOAD_CHIMU_A64FXf(x) LOAD_CHIMU_INTERLEAVED_A64FXf(x)
|
||||
#define PREFETCH_CHIMU_L1(A)
|
||||
#define PREFETCH_GAUGE_L1(A)
|
||||
#define PREFETCH_CHIMU_L2(A)
|
||||
#define PREFETCH_GAUGE_L2(A)
|
||||
#define PREFETCH_CHIMU_L1(A) PREFETCH_CHIMU_L1_INTERNAL_A64FXf(A)
|
||||
#define PREFETCH_GAUGE_L1(A) PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A)
|
||||
#define PREFETCH_CHIMU_L2(A) PREFETCH_CHIMU_L2_INTERNAL_A64FXf(A)
|
||||
#define PREFETCH_GAUGE_L2(A) PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A)
|
||||
#define PF_GAUGE(A)
|
||||
#define PREFETCH1_CHIMU(A)
|
||||
#define PREFETCH_CHIMU(A)
|
||||
#define PREFETCH1_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
||||
#define PREFETCH_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
||||
#define LOCK_GAUGE(A)
|
||||
#define UNLOCK_GAUGE(A)
|
||||
#define MASK_REGS DECLARATIONS_A64FXf
|
||||
#define COMPLEX_SIGNS(A)
|
||||
#define LOAD64(A,B)
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A)
|
||||
#define MULT_2SPIN_DIR_PF(A,B) MULT_2SPIN_A64FXf(A)
|
||||
#define MULT_2SPIN_DIR_PF(A,B) PREFETCH_GAUGE_L1(A); PREFETCH_CHIMU_L2(B); MULT_2SPIN_A64FXf(A); if ((A == 0) || (A == 4)) { PREFETCH_GAUGE_L2(A); }
|
||||
#define MAYBEPERM(A,perm) { A ; }
|
||||
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
|
||||
#define ZERO_PSI
|
||||
@ -154,15 +154,21 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
// PREFETCH_GAUGE_L2 (prefetch to L2)
|
||||
#define PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A) \
|
||||
{ \
|
||||
const auto & ref(U[sUn][A]); uint64_t baseU = (uint64_t)&ref[0][0]; \
|
||||
const auto & ref(U[sUn](A)); uint64_t baseU = (uint64_t)&ref + 3 * 3 * 64; \
|
||||
svprfd(pg1, (int64_t*)(baseU + -256), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 768), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 1024), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 1280), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 1536), SV_PLDL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 1792), SV_PLDL2STRM); \
|
||||
}
|
||||
// PREFETCH_GAUGE_L1 (prefetch to L1)
|
||||
#define PREFETCH_GAUGE_L1_INTERNAL(A)_A64FXf \
|
||||
#define PREFETCH_GAUGE_L1_INTERNAL_A64FXf(A) \
|
||||
{ \
|
||||
const auto & ref(U[sU][A]); uint64_t baseU = (uint64_t)&ref[0][0]; \
|
||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
||||
svprfd(pg1, (int64_t*)(baseU + 0), SV_PLDL1STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 256), SV_PLDL1STRM); \
|
||||
svprfd(pg1, (int64_t*)(baseU + 512), SV_PLDL1STRM); \
|
||||
|
Reference in New Issue
Block a user