From 81484a47604e1041c437501c265e9fe02bf2e2e7 Mon Sep 17 00:00:00 2001 From: nmeyer-ur Date: Wed, 20 May 2020 22:36:45 +0200 Subject: [PATCH] symmetrize Mult and MultAddComplex --- Grid/simd/Grid_a64fx-2.h | 6 ++---- Grid/simd/Grid_a64fx-fixedsize.h | 8 ++++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Grid/simd/Grid_a64fx-2.h b/Grid/simd/Grid_a64fx-2.h index cc25faa3..d92f8b40 100644 --- a/Grid/simd/Grid_a64fx-2.h +++ b/Grid/simd/Grid_a64fx-2.h @@ -400,7 +400,7 @@ struct MultComplex{ struct MultAddComplex{ // Complex a*b+c template - inline vec mac(const vec &a, const vec &b, const vec &c){ + inline vec mac(const vec &a, const vec b, const vec c){ vec out; svbool_t pg1 = acle::pg1(); @@ -412,9 +412,7 @@ struct MultAddComplex{ typename acle::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); - svst1(pg1, out.v, r_v); - - return out; + svst1(pg1, a.v, r_v); } }; diff --git a/Grid/simd/Grid_a64fx-fixedsize.h b/Grid/simd/Grid_a64fx-fixedsize.h index 3a0066d1..9e0fc51e 100644 --- a/Grid/simd/Grid_a64fx-fixedsize.h +++ b/Grid/simd/Grid_a64fx-fixedsize.h @@ -369,18 +369,18 @@ struct MultComplex{ struct MultAddComplex{ // Complex a*b+c // Complex float - inline vecf mac(vecf a, vecf b, vecf c){ + inline vecf mac(vecf &a, vecf b, vecf c){ pred pg1 = acle::pg1(); // using FCMLA vecf r_v = svcmla_x(pg1, c, a, b, 0); - return svcmla_x(pg1, r_v, a, b, 90); + a = svcmla_x(pg1, r_v, a, b, 90); } // Complex double - inline vecd mac(vecd a, vecd b, vecd c){ + inline vecd mac(vecd &a, vecd b, vecd c){ pred pg1 = acle::pg1(); // using FCMLA vecd r_v = svcmla_x(pg1, c, a, b, 0); - return svcmla_x(pg1, r_v, a, b, 90); + a = svcmla_x(pg1, r_v, a, b, 90); } };