disable fcmla in vector type building for VLA

2026-07-08 03:13:29 +01:00 · 2020-05-21 19:41:42 +02:00
parent 046b1cbbc0
commit 8c5a5fdfce
3 changed files with 31 additions and 25 deletions
@@ -324,18 +324,6 @@ struct Sub{
 };

 struct Mult{
-  template <typename T>
-  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
-    vec<T> out;
-    svbool_t pg1 = acle<T>::pg1();
-    typename acle<T>::vt a_v = svld1(pg1, a.v);
-    typename acle<T>::vt b_v = svld1(pg1, b.v);
-    typename acle<T>::vt c_v = svld1(pg1, c.v);
-    typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
-    svst1(pg1, out.v, r_v);
-
-    return out;
-  }
  template <typename T>
  inline vec<T> operator()(vec<T> a, vec<T> b){
    vec<T> out;
@@ -408,7 +396,7 @@ struct MultComplex{
 struct MultAddComplex{
  // Complex a*b+c
  template <typename T>
-  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
+  inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
    vec<T> out;
    svbool_t pg1 = acle<T>::pg1();
    typename acle<T>::vt a_v = svld1(pg1, a.v);
@@ -419,9 +407,7 @@ struct MultAddComplex{
    typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
    r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);

-    svst1(pg1, out.v, r_v);
-
-    return out;
+    svst1(pg1, a.v, r_v);
  }
 };

@@ -295,12 +295,12 @@ struct Sub{

 struct Mult{
  // Real float fma
-  inline void operator()(vecf a, vecf b, vecf c){
+  inline vecf operator()(vecf a, vecf b, vecf c){
    pred pg1 = acle<float>::pg1();
    return svmad_x(pg1, b, c, a);
  }
  // Real double fma
-  inline void operator()(vecd a, vecd b, vecd c){
+  inline vecd operator()(vecd a, vecd b, vecd c){
    pred pg1 = acle<double>::pg1();
    return svmad_x(pg1, b, c, a);
  }
@@ -298,23 +298,21 @@ public:

  // FIXME -- alias this to an accelerator_inline MAC struct.

-  // A64FX: use FCMLA
-  /*
-  #if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA
+  // FIXME VLA build error
+  //#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error
+  #if defined(A64FXFIXEDSIZE)
  friend accelerator_inline void mac(Grid_simd *__restrict__ y,
 				     const Grid_simd *__restrict__ a,
 				     const Grid_simd *__restrict__ x) {
-    y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v);
+    *y = fxmac((*a), (*x), (*y));
  };
  #else
-  #endif
-
-  */
  friend accelerator_inline void mac(Grid_simd *__restrict__ y,
 				     const Grid_simd *__restrict__ a,
 				     const Grid_simd *__restrict__ x) {
    *y = (*a) * (*x) + (*y);
  };
+  #endif

  friend accelerator_inline void mult(Grid_simd *__restrict__ y,
 				      const Grid_simd *__restrict__ l,
@@ -793,6 +791,28 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
  return ret;
 };

+// ----------------A64FX MAC ---------------------
+// Distinguish between complex types and others
+//#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error
+#if defined(A64FXFIXEDSIZE)
+template <class S, class V, IfComplex<S> = 0>
+accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
+  Grid_simd<S, V> ret;
+  ret.v = trinary<V>(a.v, b.v, c.v, MultAddComplexSIMD());
+  return ret;
+};
+
+// Real/Integer types
+template <class S, class V, IfNotComplex<S> = 0>
+accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
+  Grid_simd<S, V> ret;
+  ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
+  return ret;
+};
+#endif
+// -------------------------------------
+
+
 // Distinguish between complex types and others
 template <class S, class V, IfComplex<S> = 0>
 accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {