mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	disable fcmla in vector type building for VLA
This commit is contained in:
		@@ -324,18 +324,6 @@ struct Sub{
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct Mult{
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
 | 
			
		||||
    vec<T> out;
 | 
			
		||||
    svbool_t pg1 = acle<T>::pg1();
 | 
			
		||||
    typename acle<T>::vt a_v = svld1(pg1, a.v);
 | 
			
		||||
    typename acle<T>::vt b_v = svld1(pg1, b.v);
 | 
			
		||||
    typename acle<T>::vt c_v = svld1(pg1, c.v);
 | 
			
		||||
    typename acle<T>::vt r_v = svmad_x(pg1, b_v, c_v, a_v);
 | 
			
		||||
    svst1(pg1, out.v, r_v);
 | 
			
		||||
 | 
			
		||||
    return out;
 | 
			
		||||
  }
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  inline vec<T> operator()(vec<T> a, vec<T> b){
 | 
			
		||||
    vec<T> out;
 | 
			
		||||
@@ -408,7 +396,7 @@ struct MultComplex{
 | 
			
		||||
struct MultAddComplex{
 | 
			
		||||
  // Complex a*b+c
 | 
			
		||||
  template <typename T>
 | 
			
		||||
  inline vec<T> operator()(vec<T> a, vec<T> b, vec<T> c){
 | 
			
		||||
  inline void mac(const vec<T> &a, const vec<T> b, const vec<T> c){
 | 
			
		||||
    vec<T> out;
 | 
			
		||||
    svbool_t pg1 = acle<T>::pg1();
 | 
			
		||||
    typename acle<T>::vt a_v = svld1(pg1, a.v);
 | 
			
		||||
@@ -419,9 +407,7 @@ struct MultAddComplex{
 | 
			
		||||
    typename acle<T>::vt r_v = svcmla_x(pg1, c_v, a_v, b_v, 0);
 | 
			
		||||
    r_v = svcmla_x(pg1, r_v, a_v, b_v, 90);
 | 
			
		||||
 | 
			
		||||
    svst1(pg1, out.v, r_v);
 | 
			
		||||
 | 
			
		||||
    return out;
 | 
			
		||||
    svst1(pg1, a.v, r_v);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -295,12 +295,12 @@ struct Sub{
 | 
			
		||||
 | 
			
		||||
struct Mult{
 | 
			
		||||
  // Real float fma
 | 
			
		||||
  inline void operator()(vecf a, vecf b, vecf c){
 | 
			
		||||
  inline vecf operator()(vecf a, vecf b, vecf c){
 | 
			
		||||
    pred pg1 = acle<float>::pg1();
 | 
			
		||||
    return svmad_x(pg1, b, c, a);
 | 
			
		||||
  }
 | 
			
		||||
  // Real double fma
 | 
			
		||||
  inline void operator()(vecd a, vecd b, vecd c){
 | 
			
		||||
  inline vecd operator()(vecd a, vecd b, vecd c){
 | 
			
		||||
    pred pg1 = acle<double>::pg1();
 | 
			
		||||
    return svmad_x(pg1, b, c, a);
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -298,23 +298,21 @@ public:
 | 
			
		||||
 | 
			
		||||
  // FIXME -- alias this to an accelerator_inline MAC struct.
 | 
			
		||||
 | 
			
		||||
  // A64FX: use FCMLA
 | 
			
		||||
  /*
 | 
			
		||||
  #if defined(A64FX) || defined(A64FXFIXEDSIZE) // A64FX: use FCMLA
 | 
			
		||||
  // FIXME VLA build error
 | 
			
		||||
  //#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error
 | 
			
		||||
  #if defined(A64FXFIXEDSIZE)
 | 
			
		||||
  friend accelerator_inline void mac(Grid_simd *__restrict__ y,
 | 
			
		||||
				     const Grid_simd *__restrict__ a,
 | 
			
		||||
				     const Grid_simd *__restrict__ x) {
 | 
			
		||||
    y->v = Optimization::MultAddComplex::mac(a->v, x->v, y->v);
 | 
			
		||||
    *y = fxmac((*a), (*x), (*y));
 | 
			
		||||
  };
 | 
			
		||||
  #else
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  */
 | 
			
		||||
  friend accelerator_inline void mac(Grid_simd *__restrict__ y,
 | 
			
		||||
				     const Grid_simd *__restrict__ a,
 | 
			
		||||
				     const Grid_simd *__restrict__ x) {
 | 
			
		||||
    *y = (*a) * (*x) + (*y);
 | 
			
		||||
  };
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  friend accelerator_inline void mult(Grid_simd *__restrict__ y,
 | 
			
		||||
				      const Grid_simd *__restrict__ l,
 | 
			
		||||
@@ -793,6 +791,28 @@ accelerator_inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V>
 | 
			
		||||
  return ret;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// ----------------A64FX MAC ---------------------
 | 
			
		||||
// Distinguish between complex types and others
 | 
			
		||||
//#if defined(A64FX) || defined(A64FXFIXEDSIZE)  // VLA only: build error
 | 
			
		||||
#if defined(A64FXFIXEDSIZE)
 | 
			
		||||
template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
 | 
			
		||||
  Grid_simd<S, V> ret;
 | 
			
		||||
  ret.v = trinary<V>(a.v, b.v, c.v, MultAddComplexSIMD());
 | 
			
		||||
  return ret;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Real/Integer types
 | 
			
		||||
template <class S, class V, IfNotComplex<S> = 0>
 | 
			
		||||
accelerator_inline Grid_simd<S, V> fxmac(Grid_simd<S, V> a, Grid_simd<S, V> b, Grid_simd<S, V> c) {
 | 
			
		||||
  Grid_simd<S, V> ret;
 | 
			
		||||
  ret.v = trinary<V>(a.v, b.v, c.v, MultSIMD());
 | 
			
		||||
  return ret;
 | 
			
		||||
};
 | 
			
		||||
#endif
 | 
			
		||||
// -------------------------------------
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Distinguish between complex types and others
 | 
			
		||||
template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
accelerator_inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user