From 046b1cbbc0b771f33979b124ba0a878d8f1e5cf7 Mon Sep 17 00:00:00 2001 From: nmeyer-ur Date: Thu, 21 May 2020 19:39:07 +0200 Subject: [PATCH] enable fcmla in tensor arithmetics; fixed-size works, VLA does not compile --- Grid/simd/Grid_a64fx-2.h | 18 ++++++++++++++++-- Grid/simd/Grid_a64fx-fixedsize.h | 16 ++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/Grid/simd/Grid_a64fx-2.h b/Grid/simd/Grid_a64fx-2.h index 0333299f..65254e90 100644 --- a/Grid/simd/Grid_a64fx-2.h +++ b/Grid/simd/Grid_a64fx-2.h @@ -324,6 +324,18 @@ struct Sub{ }; struct Mult{ + template + inline vec operator()(vec a, vec b, vec c){ + vec out; + svbool_t pg1 = acle::pg1(); + typename acle::vt a_v = svld1(pg1, a.v); + typename acle::vt b_v = svld1(pg1, b.v); + typename acle::vt c_v = svld1(pg1, c.v); + typename acle::vt r_v = svmad_x(pg1, b_v, c_v, a_v); + svst1(pg1, out.v, r_v); + + return out; + } template inline vec operator()(vec a, vec b){ vec out; @@ -396,7 +408,7 @@ struct MultComplex{ struct MultAddComplex{ // Complex a*b+c template - inline void mac(const vec &a, const vec b, const vec c){ + inline vec operator()(vec a, vec b, vec c){ vec out; svbool_t pg1 = acle::pg1(); typename acle::vt a_v = svld1(pg1, a.v); @@ -407,7 +419,9 @@ struct MultAddComplex{ typename acle::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); - svst1(pg1, a.v, r_v); + svst1(pg1, out.v, r_v); + + return out; } }; diff --git a/Grid/simd/Grid_a64fx-fixedsize.h b/Grid/simd/Grid_a64fx-fixedsize.h index a07cbff3..b3b93884 100644 --- a/Grid/simd/Grid_a64fx-fixedsize.h +++ b/Grid/simd/Grid_a64fx-fixedsize.h @@ -295,14 +295,14 @@ struct Sub{ struct Mult{ // Real float fma - inline void mac(vecf &a, vecf b, vecf c){ + inline void operator()(vecf a, vecf b, vecf c){ pred pg1 = acle::pg1(); - a = svmad_x(pg1, b, c, a); + return svmad_x(pg1, b, c, a); } // Real double fma - inline void mac(vecd &a, vecd b, vecd c){ + inline void operator()(vecd a, vecd b, vecd c){ pred pg1 = acle::pg1(); - a = svmad_x(pg1, b, c, a); + return svmad_x(pg1, b, c, a); } // Real float inline vecf operator()(vecf a, vecf b){ @@ -376,18 +376,18 @@ struct MultComplex{ struct MultAddComplex{ // Complex a*b+c // Complex float - inline void mac(vecf &a, vecf b, vecf c){ + inline vecf operator()(vecf a, vecf b, vecf c){ pred pg1 = acle::pg1(); // using FCMLA vecf r_v = svcmla_x(pg1, c, a, b, 0); - a = svcmla_x(pg1, r_v, a, b, 90); + return svcmla_x(pg1, r_v, a, b, 90); } // Complex double - inline void mac(vecd &a, vecd b, vecd c){ + inline vecd operator()(vecd a, vecd b, vecd c){ pred pg1 = acle::pg1(); // using FCMLA vecd r_v = svcmla_x(pg1, c, a, b, 0); - a = svcmla_x(pg1, r_v, a, b, 90); + return svcmla_x(pg1, r_v, a, b, 90); } };