mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 10:09:34 +01:00 
			
		
		
		
	disable fcmla in vector type building for VLA
This commit is contained in:
		| @@ -324,18 +324,6 @@ struct Sub{ | |||||||
| }; | }; | ||||||
|  |  | ||||||
| struct Mult{ | struct Mult{ | ||||||
|   template <typename T> |  | ||||||
|   inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){ |  | ||||||
|     vec<T> out; |  | ||||||
|     svbool_t pg1 = acle<T>::pg1(); |  | ||||||
|     typename acle<T>::vt a_v = svld1(pg1, a.v); |  | ||||||
|     typename acle<T>::vt b_v = svld1(pg1, b.v); |  | ||||||
|     typename acle<T>::vt c_v = svld1(pg1, c.v); |  | ||||||
|     typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v); |  | ||||||
|     svst1(pg1, out.v, r_v); |  | ||||||
|  |  | ||||||
|     return out; |  | ||||||
|   } |  | ||||||
|   template <typename T> |   template <typename T> | ||||||
|   inline vec<T> operator()(vec<T> a, vec<T> b){ |   inline vec<T> operator()(vec<T> a, vec<T> b){ | ||||||
|     vec<T> out; |     vec<T> out; | ||||||
| @@ -408,7 +396,7 @@ struct MultComplex{ | |||||||
| struct MultAddComplex{ | struct MultAddComplex{ | ||||||
|   // Complex a*b+c |   // Complex a*b+c | ||||||
|   template <typename T> |   template <typename T> | ||||||
|   inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){ |   inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){ | ||||||
|     vec<T> out; |     vec<T> out; | ||||||
|     svbool_t pg1 = acle<T>::pg1(); |     svbool_t pg1 = acle<T>::pg1(); | ||||||
|     typename acle<T>::vt a_v = svld1(pg1, a.v); |     typename acle<T>::vt a_v = svld1(pg1, a.v); | ||||||
| @@ -419,9 +407,7 @@ struct MultAddComplex{ | |||||||
|     typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); |     typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0); | ||||||
|     r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); |     r_v = svcmla_x(pg1, r_v, a_v, b_v, 90); | ||||||
|  |  | ||||||
|     svst1(pg1, out.v, r_v); |     svst1(pg1, a.v, r_v); | ||||||
|  |  | ||||||
|     return out; |  | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -295,12 +295,12 @@ struct Sub{ | |||||||
|  |  | ||||||
| struct Mult{ | struct Mult{ | ||||||
|   // Real float fma |   // Real float fma | ||||||
|   inline void operator()(vecf a, vecf b, vecf c){ |   inline vecf operator()(vecf a, vecf b, vecf c){ | ||||||
|     pred pg1 = acle<float>::pg1(); |     pred pg1 = acle<float>::pg1(); | ||||||
|     return svmad_x(pg1, b, c, a); |     return svmad_x(pg1, b, c, a); | ||||||
|   } |   } | ||||||
|   // Real double fma |   // Real double fma | ||||||
|   inline void operator()(vecd a, vecd b, vecd c){ |   inline vecd operator()(vecd a, vecd b, vecd c){ | ||||||
|     pred pg1 = acle<double>::pg1(); |     pred pg1 = acle<double>::pg1(); | ||||||
|     return svmad_x(pg1, b, c, a); |     return svmad_x(pg1, b, c, a); | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -298,23 +298,21 @@ public: | |||||||
|  |  | ||||||
|   // FIXME -- alias this to an accelerator_inline MAC struct. |   // FIXME -- alias this to an accelerator_inline MAC struct. | ||||||
|  |  | ||||||
|   // A64FX: use FCMLA |   // FIXME VLA build error | ||||||
|   /* |   //#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error | ||||||
|   #if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA |   #if defined(A64FXFIXEDSIZE) | ||||||
|   friend accelerator_inline void mac(Grid_simd *__restrict__ y, |   friend accelerator_inline void mac(Grid_simd *__restrict__ y, | ||||||
| 				     const Grid_simd *__restrict__ a, | 				     const Grid_simd *__restrict__ a, | ||||||
| 				     const Grid_simd *__restrict__ x) { | 				     const Grid_simd *__restrict__ x) { | ||||||
|     y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v); |     *y = fxmac((*a), (*x), (*y)); | ||||||
|   }; |   }; | ||||||
|   #else |   #else | ||||||
|   #endif |  | ||||||
|  |  | ||||||
|   */ |  | ||||||
|   friend accelerator_inline void mac(Grid_simd *__restrict__ y, |   friend accelerator_inline void mac(Grid_simd *__restrict__ y, | ||||||
| 				     const Grid_simd *__restrict__ a, | 				     const Grid_simd *__restrict__ a, | ||||||
| 				     const Grid_simd *__restrict__ x) { | 				     const Grid_simd *__restrict__ x) { | ||||||
|     *y = (*a) * (*x) + (*y); |     *y = (*a) * (*x) + (*y); | ||||||
|   }; |   }; | ||||||
|  |   #endif | ||||||
|  |  | ||||||
|   friend accelerator_inline void mult(Grid_simd *__restrict__ y, |   friend accelerator_inline void mult(Grid_simd *__restrict__ y, | ||||||
| 				      const Grid_simd *__restrict__ l, | 				      const Grid_simd *__restrict__ l, | ||||||
| @@ -793,6 +791,28 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> | |||||||
|   return ret; |   return ret; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | // ----------------A64FX MAC --------------------- | ||||||
|  | // Distinguish between complex types and others | ||||||
|  | //#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error | ||||||
|  | #if defined(A64FXFIXEDSIZE) | ||||||
|  | template <class S, class V, IfComplex<S> = 0> | ||||||
|  | accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) { | ||||||
|  |   Grid_simd<S, V> ret; | ||||||
|  |   ret.v = trinary<V>(a.v, b.v, c.v, MultAddComplexSIMD()); | ||||||
|  |   return ret; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // Real/Integer types | ||||||
|  | template <class S, class V, IfNotComplex<S> = 0> | ||||||
|  | accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) { | ||||||
|  |   Grid_simd<S, V> ret; | ||||||
|  |   ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD()); | ||||||
|  |   return ret; | ||||||
|  | }; | ||||||
|  | #endif | ||||||
|  | // ------------------------------------- | ||||||
|  |  | ||||||
|  |  | ||||||
| // Distinguish between complex types and others | // Distinguish between complex types and others | ||||||
| template <class S, class V, IfComplex<S> = 0> | template <class S, class V, IfComplex<S> = 0> | ||||||
| accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) { | accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user