mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 15:27:06 +01:00
980 GiB/s Wilson; 680 GiB/s DW (DP)
This commit is contained in:
@ -31,6 +31,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
#define PREFETCH_CHIMU_L2(A) PREFETCH_CHIMU_L2_INTERNAL_A64FXf(A)
|
||||
#define PREFETCH_GAUGE_L2(A) PREFETCH_GAUGE_L2_INTERNAL_A64FXf(A)
|
||||
#define PF_GAUGE(A)
|
||||
#define PREFETCH_RESULT_L2_STORE(A) PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(A)
|
||||
#define PREFETCH1_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
||||
#define PREFETCH_CHIMU(A) PREFETCH_CHIMU_L1(A)
|
||||
#define LOCK_GAUGE(A)
|
||||
@ -38,8 +39,11 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
#define MASK_REGS DECLARATIONS_A64FXf
|
||||
#define COMPLEX_SIGNS(A)
|
||||
#define LOAD64(A,B)
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A)
|
||||
#define MULT_2SPIN_DIR_PF(A,B) PREFETCH_GAUGE_L1(A); PREFETCH_CHIMU_L2(B); MULT_2SPIN_A64FXf(A); if ((A == 0) || (A == 4)) { PREFETCH_GAUGE_L2(A); }
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A); PREFETCH_CHIMU_L1(B);
|
||||
#define MULT_2SPIN_DIR_PF(A,B) \
|
||||
MULT_2SPIN_A64FXf(A); \
|
||||
PREFETCH_CHIMU_L2(B); \
|
||||
if (s == 0) { if ((A == 0) || (A == 4)) { PREFETCH_GAUGE_L2(A); } }
|
||||
#define MAYBEPERM(A,perm) { A ; }
|
||||
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
|
||||
#define ZERO_PSI
|
||||
@ -285,6 +289,17 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
Chi_11 = svtbl(Chi_11, table0); \
|
||||
Chi_12 = svtbl(Chi_12, table0);
|
||||
|
||||
// LOAD_GAUGE
|
||||
#define LOAD_GAUGE \
|
||||
const auto & ref(U[sU](A)); uint64_t baseU = (uint64_t)&ref; \
|
||||
{ \
|
||||
U_00 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -6 * 64)); \
|
||||
U_10 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -3 * 64)); \
|
||||
U_20 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 0 * 64)); \
|
||||
U_01 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -5 * 64)); \
|
||||
U_11 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + -2 * 64)); \
|
||||
U_21 = svld1(pg1, (float32_t*)(baseU + 2 * 3 * 64 + 1 * 64)); \
|
||||
}
|
||||
// MULT_2SPIN
|
||||
#define MULT_2SPIN_A64FXf(A) \
|
||||
{ \
|
||||
@ -580,6 +595,13 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
result_31 = __svzero(result_31); \
|
||||
result_32 = __svzero(result_32);
|
||||
|
||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(base) \
|
||||
{ \
|
||||
svprfd(pg1, (int64_t*)(base + 0), SV_PSTL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(base + 256), SV_PSTL2STRM); \
|
||||
svprfd(pg1, (int64_t*)(base + 512), SV_PSTL2STRM); \
|
||||
}
|
||||
// ADD_RESULT_INTERNAL
|
||||
#define ADD_RESULT_INTERNAL_A64FXf \
|
||||
result_00 = svadd_x(pg1, result_00, Chimu_00); \
|
||||
|
Reference in New Issue
Block a user