diff --git a/Grid/simd/Grid_a64fx-2.h b/Grid/simd/Grid_a64fx-2.h index 0333299f..a0463a10 100644 --- a/Grid/simd/Grid_a64fx-2.h +++ b/Grid/simd/Grid_a64fx-2.h @@ -396,7 +396,7 @@ struct MultComplex{ struct MultAddComplex{ // Complex a*b+c template - inline void mac(const vec &a, const vec b, const vec c){ + inline vec operator()(vec a, vec b, vec c){ vec out; svbool_t pg1 = acle::pg1(); typename acle::vt a_v = svld1(pg1, a.v); @@ -406,8 +406,9 @@ struct MultAddComplex{ // using FCMLA typename acle::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); + svst1(pg1, out.v, r_v); - svst1(pg1, a.v, r_v); + return out; } }; diff --git a/Grid/simd/Grid_vector_types.h b/Grid/simd/Grid_vector_types.h index f8de3d30..e1eb330d 100644 --- a/Grid/simd/Grid_vector_types.h +++ b/Grid/simd/Grid_vector_types.h @@ -299,8 +299,7 @@ public: // FIXME -- alias this to an accelerator_inline MAC struct. // FIXME VLA build error - //#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error - #if defined(A64FXFIXEDSIZE) + #if defined(A64FX) || defined(A64FXFIXEDSIZE) friend accelerator_inline void mac(Grid_simd *__restrict__ y, const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ x) { @@ -791,10 +790,9 @@ accelerator_inline Grid_simd operator*(Grid_simd a, Grid_simd return ret; }; -// ----------------A64FX MAC --------------------- +// ---------------- A64FX MAC ------------------- // Distinguish between complex types and others -//#if defined(A64FX) || defined(A64FXFIXEDSIZE) // VLA only: build error -#if defined(A64FXFIXEDSIZE) +#if defined(A64FX) || defined(A64FXFIXEDSIZE) template = 0> accelerator_inline Grid_simd fxmac(Grid_simd a, Grid_simd b, Grid_simd c) { Grid_simd ret; @@ -810,7 +808,7 @@ accelerator_inline Grid_simd fxmac(Grid_simd a, Grid_simd b, G return ret; }; #endif -// ------------------------------------- +// ---------------------------------------------- // Distinguish between complex types and others