mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 15:57:05 +01:00
testing gcc 10.0.1: build errors in Exchange1 using -DA64FX and in Lattice_base.h building Dslash only
This commit is contained in:
@ -38,8 +38,6 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
#define LOCK_GAUGE(A)
|
||||
#define UNLOCK_GAUGE(A)
|
||||
#define MASK_REGS DECLARATIONS_A64FXd
|
||||
#define COMPLEX_SIGNS(A)
|
||||
#define LOAD64(A,B)
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXd(A); PREFETCH_RESULT_L2_STORE(B)
|
||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXd(Dir)
|
||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXd
|
||||
|
@ -38,8 +38,6 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
#define LOCK_GAUGE(A)
|
||||
#define UNLOCK_GAUGE(A)
|
||||
#define MASK_REGS DECLARATIONS_A64FXf
|
||||
#define COMPLEX_SIGNS(A)
|
||||
#define LOAD64(A,B)
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A); PREFETCH_RESULT_L2_STORE(B)
|
||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXf(Dir)
|
||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXf
|
||||
|
@ -38,8 +38,6 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
#define LOCK_GAUGE(A)
|
||||
#define UNLOCK_GAUGE(A)
|
||||
#define MASK_REGS DECLARATIONS_A64FXd
|
||||
#define COMPLEX_SIGNS(A)
|
||||
#define LOAD64(A,B)
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXd(A); PREFETCH_RESULT_L2_STORE(B)
|
||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXd(Dir)
|
||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXd
|
||||
@ -111,7 +109,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
pg1 = svptrue_b64(); \
|
||||
svuint64_t table0; \
|
||||
svfloat64_t zero0; \
|
||||
zero0 = __svzero(zero0);
|
||||
zero0 = svdup_f64(0.);
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
@ -559,18 +557,18 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
|
||||
// ZERO_PSI
|
||||
#define ZERO_PSI_A64FXd \
|
||||
result_00 = __svzero(result_00); \
|
||||
result_01 = __svzero(result_01); \
|
||||
result_02 = __svzero(result_02); \
|
||||
result_10 = __svzero(result_10); \
|
||||
result_11 = __svzero(result_11); \
|
||||
result_12 = __svzero(result_12); \
|
||||
result_20 = __svzero(result_20); \
|
||||
result_21 = __svzero(result_21); \
|
||||
result_22 = __svzero(result_22); \
|
||||
result_30 = __svzero(result_30); \
|
||||
result_31 = __svzero(result_31); \
|
||||
result_32 = __svzero(result_32);
|
||||
result_00 = svdup_f64(0.); \
|
||||
result_01 = svdup_f64(0.); \
|
||||
result_02 = svdup_f64(0.); \
|
||||
result_10 = svdup_f64(0.); \
|
||||
result_11 = svdup_f64(0.); \
|
||||
result_12 = svdup_f64(0.); \
|
||||
result_20 = svdup_f64(0.); \
|
||||
result_21 = svdup_f64(0.); \
|
||||
result_22 = svdup_f64(0.); \
|
||||
result_30 = svdup_f64(0.); \
|
||||
result_31 = svdup_f64(0.); \
|
||||
result_32 = svdup_f64(0.);
|
||||
|
||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXd(base) \
|
||||
|
@ -38,8 +38,6 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
#define LOCK_GAUGE(A)
|
||||
#define UNLOCK_GAUGE(A)
|
||||
#define MASK_REGS DECLARATIONS_A64FXf
|
||||
#define COMPLEX_SIGNS(A)
|
||||
#define LOAD64(A,B)
|
||||
#define SAVE_RESULT(A,B) RESULT_A64FXf(A); PREFETCH_RESULT_L2_STORE(B)
|
||||
#define MULT_2SPIN_1(Dir) MULT_2SPIN_1_A64FXf(Dir)
|
||||
#define MULT_2SPIN_2 MULT_2SPIN_2_A64FXf
|
||||
@ -111,7 +109,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
pg1 = svptrue_b32(); \
|
||||
svuint32_t table0; \
|
||||
svfloat32_t zero0; \
|
||||
zero0 = __svzero(zero0);
|
||||
zero0 = svdup_f32(0.);
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
@ -559,18 +557,18 @@ Author: Nils Meyer <nils.meyer@ur.de>
|
||||
|
||||
// ZERO_PSI
|
||||
#define ZERO_PSI_A64FXf \
|
||||
result_00 = __svzero(result_00); \
|
||||
result_01 = __svzero(result_01); \
|
||||
result_02 = __svzero(result_02); \
|
||||
result_10 = __svzero(result_10); \
|
||||
result_11 = __svzero(result_11); \
|
||||
result_12 = __svzero(result_12); \
|
||||
result_20 = __svzero(result_20); \
|
||||
result_21 = __svzero(result_21); \
|
||||
result_22 = __svzero(result_22); \
|
||||
result_30 = __svzero(result_30); \
|
||||
result_31 = __svzero(result_31); \
|
||||
result_32 = __svzero(result_32);
|
||||
result_00 = svdup_f32(0.); \
|
||||
result_01 = svdup_f32(0.); \
|
||||
result_02 = svdup_f32(0.); \
|
||||
result_10 = svdup_f32(0.); \
|
||||
result_11 = svdup_f32(0.); \
|
||||
result_12 = svdup_f32(0.); \
|
||||
result_20 = svdup_f32(0.); \
|
||||
result_21 = svdup_f32(0.); \
|
||||
result_22 = svdup_f32(0.); \
|
||||
result_30 = svdup_f32(0.); \
|
||||
result_31 = svdup_f32(0.); \
|
||||
result_32 = svdup_f32(0.);
|
||||
|
||||
// PREFETCH_RESULT_L2_STORE (prefetch store to L2)
|
||||
#define PREFETCH_RESULT_L2_STORE_INTERNAL_A64FXf(base) \
|
||||
|
@ -385,7 +385,7 @@ struct MultComplex{
|
||||
svbool_t pg1 = acle<T>::pg1();
|
||||
typename acle<T>::vt a_v = svld1(pg1, a.v);
|
||||
typename acle<T>::vt b_v = svld1(pg1, b.v);
|
||||
typename acle<T>::vt z_v = __svzero(z_v);
|
||||
typename acle<T>::vt z_v = acle<T>::zero();
|
||||
|
||||
// using FCMLA
|
||||
typename acle<T>::vt r_v = svcmla_x(pg1, z_v, a_v, b_v, 90);
|
||||
|
@ -118,7 +118,10 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) {
|
||||
#ifdef GEN
|
||||
#if defined(A64FX) // breakout A64FX SVE ACLE here
|
||||
//#pragma message("building for A64FX / SVE ACLE")
|
||||
#define ARMCLANGHOTFIX
|
||||
#if defined(clang)
|
||||
#define ARMCLANGHOTFIX // armclang 20.0 compiles, but binaries give wrong results without hotfix
|
||||
#endif
|
||||
#include <arm_sve.h>
|
||||
#include "Grid_a64fx-2.h"
|
||||
#else
|
||||
#include "Grid_generic.h"
|
||||
|
Reference in New Issue
Block a user