From 8c5a5fdfceb69ce5caabbfa3e5f32b7b8bbbd4d6 Mon Sep 17 00:00:00 2001 From: nmeyer-ur Date: Thu, 21 May 2020 19:41:42 +0200 Subject: [PATCH] disable fcmla in vector type building for VLA --- Grid/simd/Grid_a64fx-2.h | 18 ++--------------- Grid/simd/Grid_a64fx-fixedsize.h | 4 ++-- Grid/simd/Grid_vector_types.h | 34 +++++++++++++++++++++++++------- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/Grid/simd/Grid_a64fx-2.h b/Grid/simd/Grid_a64fx-2.h index 65254e90..0333299f 100644 --- a/Grid/simd/Grid_a64fx-2.h +++ b/Grid/simd/Grid_a64fx-2.h @@ -324,18 +324,6 @@ struct Sub{ }; struct Mult{ - template - inline vec operator()(vec a, vec b, vec c){ - vec out; - svbool_t pg1 = acle::pg1(); - typename acle::vt a_v = svld1(pg1, a.v); - typename acle::vt b_v = svld1(pg1, b.v); - typename acle::vt c_v = svld1(pg1, c.v); - typename acle::vt r_v = svmad_x(pg1, b_v, c_v, a_v); - svst1(pg1, out.v, r_v); - - return out; - } template inline vec operator()(vec a, vec b){ vec out; @@ -408,7 +396,7 @@ struct MultComplex{ struct MultAddComplex{ // Complex a*b+c template - inline vec operator()(vec a, vec b, vec c){ + inline void mac(const vec &a, const vec b, const vec c){ vec out; svbool_t pg1 = acle::pg1(); typename acle::vt a_v = svld1(pg1, a.v); @@ -419,9 +407,7 @@ struct MultAddComplex{ typename acle::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); - svst1(pg1, out.v, r_v); - - return out; + svst1(pg1, a.v, r_v); } }; diff --git a/Grid/simd/Grid_a64fx-fixedsize.h b/Grid/simd/Grid_a64fx-fixedsize.h index b3b93884..2a6533fe 100644 --- a/Grid/simd/Grid_a64fx-fixedsize.h +++ b/Grid/simd/Grid_a64fx-fixedsize.h @@ -295,12 +295,12 @@ struct Sub{ struct Mult{ // Real float fma - inline void operator()(vecf a, vecf b, vecf c){ + inline vecf operator()(vecf a, vecf b, vecf c){ pred pg1 = acle::pg1(); return svmad_x(pg1, b, c, a); } // Real double fma - inline void operator()(vecd a, vecd b, vecd c){ + inline vecd operator()(vecd a, vecd b, vecd c){ pred pg1 = acle::pg1(); return svmad_x(pg1, b, c, a); } diff --git a/Grid/simd/Grid_vector_types.h b/Grid/simd/Grid_vector_types.h index e2624e15..f8de3d30 100644 --- a/Grid/simd/Grid_vector_types.h +++ b/Grid/simd/Grid_vector_types.h @@ -298,23 +298,21 @@ public: // FIXME -- alias this to an accelerator_inline MAC struct. - // A64FX: use FCMLA - /* - #if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA + // FIXME VLA build error + //#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error + #if defined(A64FXFIXEDSIZE) friend accelerator_inline void mac(Grid_simd *__restrict__ y, const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ x) { - y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v); + *y = fxmac((*a), (*x), (*y)); }; #else - #endif - - */ friend accelerator_inline void mac(Grid_simd *__restrict__ y, const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ x) { *y = (*a) * (*x) + (*y); }; + #endif friend accelerator_inline void mult(Grid_simd *__restrict__ y, const Grid_simd *__restrict__ l, @@ -793,6 +791,28 @@ accelerator_inline Grid_simd operator*(Grid_simd a, Grid_simd return ret; }; +// ----------------A64FX MAC --------------------- +// Distinguish between complex types and others +//#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error +#if defined(A64FXFIXEDSIZE) +template = 0> +accelerator_inline Grid_simd fxmac(Grid_simd a, Grid_simd b, Grid_simd c) { + Grid_simd ret; + ret.v = trinary(a.v, b.v, c.v, MultAddComplexSIMD()); + return ret; +}; + +// Real/Integer types +template = 0> +accelerator_inline Grid_simd fxmac(Grid_simd a, Grid_simd b, Grid_simd c) { + Grid_simd ret; + ret.v = trinary(a.v, b.v, c.v, MultSIMD()); + return ret; +}; +#endif +// ------------------------------------- + + // Distinguish between complex types and others template = 0> accelerator_inline Grid_simd operator/(Grid_simd a, Grid_simd b) {