disable fcmla in vector type building for VLA

2025-12-16 18:54:40 +00:00 · 2020-05-21 19:41:42 +02:00
parent 046b1cbbc0
commit 8c5a5fdfce
3 changed files with 31 additions and 25 deletions
--- a/Grid/simd/Grid_a64fx-2.h
+++ b/Grid/simd/Grid_a64fx-2.h
@@ -324,18 +324,6 @@ struct Sub{
 };
 struct Mult{
  template <typename T>
  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
    vec<T> out;
    svbool_t pg1 = acle<T>::pg1();
    typename acle<T>::vt a_v = svld1(pg1, a.v);
    typename acle<T>::vt b_v = svld1(pg1, b.v);
    typename acle<T>::vt c_v = svld1(pg1, c.v);
    typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
    svst1(pg1, out.v, r_v);
    return out;
  }
  template <typename T>
  inline vec<T> operator()(vec<T> a, vec<T> b){
    vec<T> out;
@@ -408,7 +396,7 @@ struct MultComplex{
 struct MultAddComplex{
  // Complex a*b+c
  template <typename T>
-  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
+  inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
    vec<T> out;
    svbool_t pg1 = acle<T>::pg1();
    typename acle<T>::vt a_v = svld1(pg1, a.v);
@@ -419,9 +407,7 @@ struct MultAddComplex{
    typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
    r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
-    svst1(pg1, out.v, r_v);
+    svst1(pg1, a.v, r_v);
    return out;
  }
 };
--- a/Grid/simd/Grid_a64fx-fixedsize.h
+++ b/Grid/simd/Grid_a64fx-fixedsize.h
@@ -295,12 +295,12 @@ struct Sub{
 struct Mult{
  // Real float fma
-  inline void operator()(vecf a, vecf b, vecf c){
+  inline vecf operator()(vecf a, vecf b, vecf c){
    pred pg1 = acle<float>::pg1();
    return svmad_x(pg1, b, c, a);
  }
  // Real double fma
-  inline void operator()(vecd a, vecd b, vecd c){
+  inline vecd operator()(vecd a, vecd b, vecd c){
    pred pg1 = acle<double>::pg1();
    return svmad_x(pg1, b, c, a);
  }
--- a/Grid/simd/Grid_vector_types.h
+++ b/Grid/simd/Grid_vector_types.h
@@ -298,23 +298,21 @@ public:
  // FIXME -- alias this to an accelerator_inline MAC struct.
-  // A64FX: use FCMLA
+  // FIXME VLA build error
-  /*
+  //#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error
-  #if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA
+  #if defined(A64FXFIXEDSIZE)
  friend accelerator_inline void mac(Grid_simd *__restrict__ y,
 				     const Grid_simd *__restrict__ a,
 				     const Grid_simd *__restrict__ x) {
-    y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v);
+    *y = fxmac((*a), (*x), (*y));
  };
  #else
  #endif
  */
  friend accelerator_inline void mac(Grid_simd *__restrict__ y,
 				     const Grid_simd *__restrict__ a,
 				     const Grid_simd *__restrict__ x) {
    *y = (*a) * (*x) + (*y);
  };
  #endif
  friend accelerator_inline void mult(Grid_simd *__restrict__ y,
 				      const Grid_simd *__restrict__ l,
@@ -793,6 +791,28 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
  return ret;
 };
 // ----------------A64FX MAC ---------------------
 // Distinguish between complex types and others
 //#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error
 #if defined(A64FXFIXEDSIZE)
 template <class S, class V, IfComplex<S> = 0>
 accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
  Grid_simd<S, V> ret;
  ret.v = trinary<V>(a.v, b.v, c.v, MultAddComplexSIMD());
  return ret;
 };
 // Real/Integer types
 template <class S, class V, IfNotComplex<S> = 0>
 accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
  Grid_simd<S, V> ret;
  ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
  return ret;
 };
 #endif
 // -------------------------------------
 // Distinguish between complex types and others
 template <class S, class V, IfComplex<S> = 0>
 accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {