mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Divide handling improved
This commit is contained in:
		@@ -365,6 +365,18 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Div{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline __m256 operator()(__m256 a, __m256 b){
 | 
			
		||||
      return _mm256_div_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline __m256d operator()(__m256d a, __m256d b){
 | 
			
		||||
      return _mm256_div_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  struct Conj{
 | 
			
		||||
    // Complex single
 | 
			
		||||
    inline __m256 operator()(__m256 in){
 | 
			
		||||
@@ -473,7 +485,7 @@ namespace Optimization {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
    inline std::ostream & operator << (std::ostream& stream, const __m256 a)
 | 
			
		||||
    {
 | 
			
		||||
      const float *p=(const float *)&a;
 | 
			
		||||
@@ -486,7 +498,7 @@ namespace Optimization {
 | 
			
		||||
      stream<< "{"<<p[0]<<","<<p[1]<<","<<p[2]<<","<<p[3]<<"}";
 | 
			
		||||
      return stream;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
  */
 | 
			
		||||
  struct Rotate{
 | 
			
		||||
 | 
			
		||||
    static inline __m256 rotate(__m256 in,int n){ 
 | 
			
		||||
@@ -631,6 +643,7 @@ namespace Optimization {
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
  typedef Optimization::Sub         SubSIMD;
 | 
			
		||||
  typedef Optimization::Div         DivSIMD;
 | 
			
		||||
  typedef Optimization::Mult        MultSIMD;
 | 
			
		||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
			
		||||
  typedef Optimization::Conj        ConjSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -230,6 +230,17 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Div{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline __m512 operator()(__m512 a, __m512 b){
 | 
			
		||||
      return _mm512_div_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline __m512d operator()(__m512d a, __m512d b){
 | 
			
		||||
      return _mm512_div_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  struct Conj{
 | 
			
		||||
    // Complex single
 | 
			
		||||
@@ -427,6 +438,7 @@ namespace Optimization {
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
  typedef Optimization::Sub         SubSIMD;
 | 
			
		||||
  typedef Optimization::Mult        MultSIMD;
 | 
			
		||||
  typedef Optimization::Div         DivSIMD;
 | 
			
		||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
			
		||||
  typedef Optimization::Conj        ConjSIMD;
 | 
			
		||||
  typedef Optimization::TimesMinusI TimesMinusISIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -244,6 +244,17 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Div{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline __m512 operator()(__m512 a, __m512 b){
 | 
			
		||||
      return _mm512_div_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline __m512d operator()(__m512d a, __m512d b){
 | 
			
		||||
      return _mm512_div_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  struct Conj{
 | 
			
		||||
    // Complex single
 | 
			
		||||
@@ -437,6 +448,7 @@ namespace Optimization {
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
  typedef Optimization::Sub         SubSIMD;
 | 
			
		||||
  typedef Optimization::Div         DivSIMD;
 | 
			
		||||
  typedef Optimization::Mult        MultSIMD;
 | 
			
		||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
			
		||||
  typedef Optimization::Conj        ConjSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -224,6 +224,18 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Div{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline __m128 operator()(__m128 a, __m128 b){
 | 
			
		||||
      return _mm_div_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline __m128d operator()(__m128d a, __m128d b){
 | 
			
		||||
      return _mm_div_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  struct Conj{
 | 
			
		||||
    // Complex single
 | 
			
		||||
    inline __m128 operator()(__m128 in){
 | 
			
		||||
@@ -372,6 +384,8 @@ namespace Optimization {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Here assign types 
 | 
			
		||||
 | 
			
		||||
@@ -398,6 +412,7 @@ namespace Optimization {
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
  typedef Optimization::Sub         SubSIMD;
 | 
			
		||||
  typedef Optimization::Div         DivSIMD;
 | 
			
		||||
  typedef Optimization::Mult        MultSIMD;
 | 
			
		||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
			
		||||
  typedef Optimization::Conj        ConjSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -77,38 +77,24 @@ struct RealPart<std::complex<T> > {
 | 
			
		||||
//////////////////////////////////////
 | 
			
		||||
// demote a vector to real type
 | 
			
		||||
//////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
// type alias used to simplify the syntax of std::enable_if
 | 
			
		||||
template <typename T>
 | 
			
		||||
using Invoke = typename T::type;
 | 
			
		||||
template <typename Condition, typename ReturnType>
 | 
			
		||||
using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
 | 
			
		||||
template <typename Condition, typename ReturnType>
 | 
			
		||||
using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
 | 
			
		||||
template <typename T> using Invoke = typename T::type;
 | 
			
		||||
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
 | 
			
		||||
template <typename Condition, typename ReturnType> using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////
 | 
			
		||||
// Check for complexity with type traits
 | 
			
		||||
template <typename T>
 | 
			
		||||
struct is_complex : public std::false_type {};
 | 
			
		||||
template <>
 | 
			
		||||
struct is_complex<std::complex<double> > : public std::true_type {};
 | 
			
		||||
template <>
 | 
			
		||||
struct is_complex<std::complex<float> > : public std::true_type {};
 | 
			
		||||
template <typename T> struct is_complex : public std::false_type {};
 | 
			
		||||
template <> struct is_complex<std::complex<double> > : public std::true_type {};
 | 
			
		||||
template <> struct is_complex<std::complex<float> > : public std::true_type {};
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
 | 
			
		||||
template <typename T>
 | 
			
		||||
using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
 | 
			
		||||
template <typename T>
 | 
			
		||||
using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
 | 
			
		||||
template <typename T> using IfReal       = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
 | 
			
		||||
template <typename T> using IfComplex    = Invoke<std::enable_if<is_complex<T>::value, int> >;
 | 
			
		||||
template <typename T> using IfInteger    = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
using IfNotReal =
 | 
			
		||||
    Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
 | 
			
		||||
template <typename T>
 | 
			
		||||
using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
 | 
			
		||||
template <typename T>
 | 
			
		||||
using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
 | 
			
		||||
template <typename T> using IfNotReal    = Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
 | 
			
		||||
template <typename T> using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
 | 
			
		||||
template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////
 | 
			
		||||
// Define the operation templates functors
 | 
			
		||||
@@ -285,6 +271,20 @@ class Grid_simd {
 | 
			
		||||
    return a * b;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////
 | 
			
		||||
  // Divides
 | 
			
		||||
  //////////////////////////////////
 | 
			
		||||
  friend inline Grid_simd operator/(const Scalar_type &a, Grid_simd b) {
 | 
			
		||||
    Grid_simd va;
 | 
			
		||||
    vsplat(va, a);
 | 
			
		||||
    return va / b;
 | 
			
		||||
  }
 | 
			
		||||
  friend inline Grid_simd operator/(Grid_simd b, const Scalar_type &a) {
 | 
			
		||||
    Grid_simd va;
 | 
			
		||||
    vsplat(va, a);
 | 
			
		||||
    return b / a;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////
 | 
			
		||||
  // Unary negation
 | 
			
		||||
  ///////////////////////
 | 
			
		||||
@@ -428,7 +428,6 @@ inline void rotate(Grid_simd<S,V> &ret,Grid_simd<S,V> b,int nrot)
 | 
			
		||||
  ret.v = Optimization::Rotate::rotate(b.v,2*nrot);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class S, class V> 
 | 
			
		||||
inline void vbroadcast(Grid_simd<S,V> &ret,const Grid_simd<S,V> &src,int lane){
 | 
			
		||||
  S* typepun =(S*) &src;
 | 
			
		||||
@@ -512,7 +511,6 @@ template <class S, class V, IfInteger<S> = 0>
 | 
			
		||||
inline void vfalse(Grid_simd<S, V> &ret) {
 | 
			
		||||
  vsplat(ret, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V>
 | 
			
		||||
inline void zeroit(Grid_simd<S, V> &z) {
 | 
			
		||||
  vzero(z);
 | 
			
		||||
@@ -530,7 +528,6 @@ inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
 | 
			
		||||
  typedef typename S::value_type T;
 | 
			
		||||
  binary<void>((T *)&out.v, in.v, VstreamSIMD());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V, IfInteger<S> = 0>
 | 
			
		||||
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
 | 
			
		||||
  out = in;
 | 
			
		||||
@@ -569,6 +566,36 @@ inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
 | 
			
		||||
  return ret;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Distinguish between complex types and others
 | 
			
		||||
template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
 | 
			
		||||
  typedef Grid_simd<S, V> simd;
 | 
			
		||||
 | 
			
		||||
  simd ret;
 | 
			
		||||
  simd den;
 | 
			
		||||
  typename simd::conv_t conv;
 | 
			
		||||
 | 
			
		||||
  ret = a * conjugate(b) ;
 | 
			
		||||
  den = b * conjugate(b) ;
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  auto real_den = toReal(den);
 | 
			
		||||
 | 
			
		||||
  ret.v=binary<V>(ret.v, real_den.v, DivSIMD());
 | 
			
		||||
 | 
			
		||||
  std::cout << " Div call from complex "<<a<<" / " << b<< " = " <<ret<< std::endl;
 | 
			
		||||
 | 
			
		||||
  return ret;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Real/Integer types
 | 
			
		||||
template <class S, class V, IfNotComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
 | 
			
		||||
  Grid_simd<S, V> ret;
 | 
			
		||||
  ret.v = binary<V>(a.v, b.v, DivSIMD());
 | 
			
		||||
  return ret;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
///////////////////////
 | 
			
		||||
// Conjugate
 | 
			
		||||
///////////////////////
 | 
			
		||||
@@ -582,7 +609,6 @@ template <class S, class V, IfNotComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
 | 
			
		||||
  return in;  // for real objects
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Suppress adj for integer types... // odd; why conjugate above but not adj??
 | 
			
		||||
template <class S, class V, IfNotInteger<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> adj(const Grid_simd<S, V> &in) {
 | 
			
		||||
@@ -596,14 +622,12 @@ template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
inline void timesMinusI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
 | 
			
		||||
  ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
 | 
			
		||||
  Grid_simd<S, V> ret;
 | 
			
		||||
  timesMinusI(ret, in);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V, IfNotComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
 | 
			
		||||
  return in;
 | 
			
		||||
@@ -616,14 +640,12 @@ template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
inline void timesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
 | 
			
		||||
  ret.v = binary<V>(in.v, ret.v, TimesISIMD());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V, IfComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
 | 
			
		||||
  Grid_simd<S, V> ret;
 | 
			
		||||
  timesI(ret, in);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class S, class V, IfNotComplex<S> = 0>
 | 
			
		||||
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
 | 
			
		||||
  return in;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user