1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-25 13:15:55 +01:00

Divide handling improved

This commit is contained in:
paboyle 2016-09-26 09:42:22 +01:00
parent b6713ecb60
commit 836e929565
5 changed files with 110 additions and 36 deletions

View File

@ -365,6 +365,18 @@ namespace Optimization {
} }
}; };
struct Div{
// Real float
inline __m256 operator()(__m256 a, __m256 b){
return _mm256_div_ps(a,b);
}
// Real double
inline __m256d operator()(__m256d a, __m256d b){
return _mm256_div_pd(a,b);
}
};
struct Conj{ struct Conj{
// Complex single // Complex single
inline __m256 operator()(__m256 in){ inline __m256 operator()(__m256 in){
@ -473,7 +485,7 @@ namespace Optimization {
} }
#endif #endif
/*
inline std::ostream & operator << (std::ostream& stream, const __m256 a) inline std::ostream & operator << (std::ostream& stream, const __m256 a)
{ {
const float *p=(const float *)&a; const float *p=(const float *)&a;
@ -486,7 +498,7 @@ namespace Optimization {
stream<< "{"<<p[0]<<","<<p[1]<<","<<p[2]<<","<<p[3]<<"}"; stream<< "{"<<p[0]<<","<<p[1]<<","<<p[2]<<","<<p[3]<<"}";
return stream; return stream;
}; };
*/
struct Rotate{ struct Rotate{
static inline __m256 rotate(__m256 in,int n){ static inline __m256 rotate(__m256 in,int n){
@ -631,6 +643,7 @@ namespace Optimization {
// Arithmetic operations // Arithmetic operations
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;

View File

@ -230,6 +230,17 @@ namespace Optimization {
} }
}; };
struct Div{
// Real float
inline __m512 operator()(__m512 a, __m512 b){
return _mm512_div_ps(a,b);
}
// Real double
inline __m512d operator()(__m512d a, __m512d b){
return _mm512_div_pd(a,b);
}
};
struct Conj{ struct Conj{
// Complex single // Complex single
@ -427,6 +438,7 @@ namespace Optimization {
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesMinusI TimesMinusISIMD;

View File

@ -244,6 +244,17 @@ namespace Optimization {
} }
}; };
struct Div{
// Real float
inline __m512 operator()(__m512 a, __m512 b){
return _mm512_div_ps(a,b);
}
// Real double
inline __m512d operator()(__m512d a, __m512d b){
return _mm512_div_pd(a,b);
}
};
struct Conj{ struct Conj{
// Complex single // Complex single
@ -437,6 +448,7 @@ namespace Optimization {
// Arithmetic operations // Arithmetic operations
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;

View File

@ -224,6 +224,18 @@ namespace Optimization {
} }
}; };
struct Div{
// Real float
inline __m128 operator()(__m128 a, __m128 b){
return _mm_div_ps(a,b);
}
// Real double
inline __m128d operator()(__m128d a, __m128d b){
return _mm_div_pd(a,b);
}
};
struct Conj{ struct Conj{
// Complex single // Complex single
inline __m128 operator()(__m128 in){ inline __m128 operator()(__m128 in){
@ -372,6 +384,8 @@ namespace Optimization {
} }
} }
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
@ -398,6 +412,7 @@ namespace Optimization {
// Arithmetic operations // Arithmetic operations
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
typedef Optimization::Div DivSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD; typedef Optimization::Conj ConjSIMD;

View File

@ -77,38 +77,24 @@ struct RealPart<std::complex<T> > {
////////////////////////////////////// //////////////////////////////////////
// demote a vector to real type // demote a vector to real type
////////////////////////////////////// //////////////////////////////////////
// type alias used to simplify the syntax of std::enable_if // type alias used to simplify the syntax of std::enable_if
template <typename T> template <typename T> using Invoke = typename T::type;
using Invoke = typename T::type; template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
template <typename Condition, typename ReturnType> template <typename Condition, typename ReturnType> using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType> >;
template <typename Condition, typename ReturnType>
using NotEnableIf = Invoke<std::enable_if<!Condition::value, ReturnType> >;
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Check for complexity with type traits // Check for complexity with type traits
template <typename T> template <typename T> struct is_complex : public std::false_type {};
struct is_complex : public std::false_type {}; template <> struct is_complex<std::complex<double> > : public std::true_type {};
template <> template <> struct is_complex<std::complex<float> > : public std::true_type {};
struct is_complex<std::complex<double> > : public std::true_type {};
template <>
struct is_complex<std::complex<float> > : public std::true_type {};
template <typename T> template <typename T> using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >;
using IfReal = Invoke<std::enable_if<std::is_floating_point<T>::value, int> >; template <typename T> using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
template <typename T> template <typename T> using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
using IfComplex = Invoke<std::enable_if<is_complex<T>::value, int> >;
template <typename T>
using IfInteger = Invoke<std::enable_if<std::is_integral<T>::value, int> >;
template <typename T> template <typename T> using IfNotReal = Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >;
using IfNotReal = template <typename T> using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
Invoke<std::enable_if<!std::is_floating_point<T>::value, int> >; template <typename T> using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
template <typename T>
using IfNotComplex = Invoke<std::enable_if<!is_complex<T>::value, int> >;
template <typename T>
using IfNotInteger = Invoke<std::enable_if<!std::is_integral<T>::value, int> >;
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Define the operation templates functors // Define the operation templates functors
@ -285,6 +271,20 @@ class Grid_simd {
return a * b; return a * b;
} }
//////////////////////////////////
// Divides
//////////////////////////////////
friend inline Grid_simd operator/(const Scalar_type &a, Grid_simd b) {
Grid_simd va;
vsplat(va, a);
return va / b;
}
friend inline Grid_simd operator/(Grid_simd b, const Scalar_type &a) {
Grid_simd va;
vsplat(va, a);
return b / a;
}
/////////////////////// ///////////////////////
// Unary negation // Unary negation
/////////////////////// ///////////////////////
@ -428,7 +428,6 @@ inline void rotate(Grid_simd<S,V> &ret,Grid_simd<S,V> b,int nrot)
ret.v = Optimization::Rotate::rotate(b.v,2*nrot); ret.v = Optimization::Rotate::rotate(b.v,2*nrot);
} }
template <class S, class V> template <class S, class V>
inline void vbroadcast(Grid_simd<S,V> &ret,const Grid_simd<S,V> &src,int lane){ inline void vbroadcast(Grid_simd<S,V> &ret,const Grid_simd<S,V> &src,int lane){
S* typepun =(S*) &src; S* typepun =(S*) &src;
@ -512,7 +511,6 @@ template <class S, class V, IfInteger<S> = 0>
inline void vfalse(Grid_simd<S, V> &ret) { inline void vfalse(Grid_simd<S, V> &ret) {
vsplat(ret, 0); vsplat(ret, 0);
} }
template <class S, class V> template <class S, class V>
inline void zeroit(Grid_simd<S, V> &z) { inline void zeroit(Grid_simd<S, V> &z) {
vzero(z); vzero(z);
@ -530,7 +528,6 @@ inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
typedef typename S::value_type T; typedef typename S::value_type T;
binary<void>((T *)&out.v, in.v, VstreamSIMD()); binary<void>((T *)&out.v, in.v, VstreamSIMD());
} }
template <class S, class V, IfInteger<S> = 0> template <class S, class V, IfInteger<S> = 0>
inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) { inline void vstream(Grid_simd<S, V> &out, const Grid_simd<S, V> &in) {
out = in; out = in;
@ -569,6 +566,36 @@ inline Grid_simd<S, V> operator*(Grid_simd<S, V> a, Grid_simd<S, V> b) {
return ret; return ret;
}; };
// Distinguish between complex types and others
template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
typedef Grid_simd<S, V> simd;
simd ret;
simd den;
typename simd::conv_t conv;
ret = a * conjugate(b) ;
den = b * conjugate(b) ;
auto real_den = toReal(den);
ret.v=binary<V>(ret.v, real_den.v, DivSIMD());
std::cout << " Div call from complex "<<a<<" / " << b<< " = " <<ret<< std::endl;
return ret;
};
// Real/Integer types
template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> operator/(Grid_simd<S, V> a, Grid_simd<S, V> b) {
Grid_simd<S, V> ret;
ret.v = binary<V>(a.v, b.v, DivSIMD());
return ret;
};
/////////////////////// ///////////////////////
// Conjugate // Conjugate
/////////////////////// ///////////////////////
@ -582,7 +609,6 @@ template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) { inline Grid_simd<S, V> conjugate(const Grid_simd<S, V> &in) {
return in; // for real objects return in; // for real objects
} }
// Suppress adj for integer types... // odd; why conjugate above but not adj?? // Suppress adj for integer types... // odd; why conjugate above but not adj??
template <class S, class V, IfNotInteger<S> = 0> template <class S, class V, IfNotInteger<S> = 0>
inline Grid_simd<S, V> adj(const Grid_simd<S, V> &in) { inline Grid_simd<S, V> adj(const Grid_simd<S, V> &in) {
@ -596,14 +622,12 @@ template <class S, class V, IfComplex<S> = 0>
inline void timesMinusI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) { inline void timesMinusI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD()); ret.v = binary<V>(in.v, ret.v, TimesMinusISIMD());
} }
template <class S, class V, IfComplex<S> = 0> template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) { inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
Grid_simd<S, V> ret; Grid_simd<S, V> ret;
timesMinusI(ret, in); timesMinusI(ret, in);
return ret; return ret;
} }
template <class S, class V, IfNotComplex<S> = 0> template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) { inline Grid_simd<S, V> timesMinusI(const Grid_simd<S, V> &in) {
return in; return in;
@ -616,14 +640,12 @@ template <class S, class V, IfComplex<S> = 0>
inline void timesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) { inline void timesI(Grid_simd<S, V> &ret, const Grid_simd<S, V> &in) {
ret.v = binary<V>(in.v, ret.v, TimesISIMD()); ret.v = binary<V>(in.v, ret.v, TimesISIMD());
} }
template <class S, class V, IfComplex<S> = 0> template <class S, class V, IfComplex<S> = 0>
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) { inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
Grid_simd<S, V> ret; Grid_simd<S, V> ret;
timesI(ret, in); timesI(ret, in);
return ret; return ret;
} }
template <class S, class V, IfNotComplex<S> = 0> template <class S, class V, IfNotComplex<S> = 0>
inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) { inline Grid_simd<S, V> timesI(const Grid_simd<S, V> &in) {
return in; return in;