mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Corrected bug in integer multiplications for SSE4 and AVX2
Merge remote-tracking branch 'upstream/master' Conflicts: tests/Make.inc
This commit is contained in:
@ -4,7 +4,7 @@
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-06-09 14:26:59 neo>
|
||||
// Time-stamp: <2015-06-16 23:30:41 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
@ -248,7 +248,7 @@ namespace Optimization {
|
||||
return _mm256_set_m128i(a1,a0);
|
||||
#endif
|
||||
#if defined (AVX2)
|
||||
return _mm256_mul_epi32(a,b);
|
||||
return _mm256_mullo_epi32(a,b);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
Using intrinsics
|
||||
*/
|
||||
// Time-stamp: <2015-06-09 14:24:01 neo>
|
||||
// Time-stamp: <2015-06-16 23:27:54 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <pmmintrin.h>
|
||||
@ -97,7 +97,7 @@ namespace Optimization {
|
||||
}
|
||||
// Integer
|
||||
inline __m128i operator()(Integer *a){
|
||||
return _mm_set_epi32(a[0],a[1],a[2],a[3]);
|
||||
return _mm_set_epi32(a[3],a[2],a[1],a[0]);
|
||||
}
|
||||
|
||||
|
||||
@ -181,7 +181,7 @@ namespace Optimization {
|
||||
}
|
||||
// Integer
|
||||
inline __m128i operator()(__m128i a, __m128i b){
|
||||
return _mm_mul_epi32(a,b);
|
||||
return _mm_mullo_epi32(a,b);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -28,13 +28,19 @@
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////
|
||||
// To take the floating point type of real/complex type
|
||||
//////////////////////////////////////
|
||||
template <typename T> struct RealPart {
|
||||
typedef T type;
|
||||
};
|
||||
template <typename T> struct RealPart< std::complex<T> >{
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
//////////////////////////////////////
|
||||
// demote a vector to real type
|
||||
//////////////////////////////////////
|
||||
|
||||
// type alias used to simplify the syntax of std::enable_if
|
||||
template <typename T> using Invoke = typename T::type;
|
||||
@ -90,7 +96,7 @@ namespace Grid {
|
||||
Vector_type v;
|
||||
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)];
|
||||
conv_t_union(){};
|
||||
} conv_t;
|
||||
} conv_t;
|
||||
|
||||
|
||||
Vector_type v;
|
||||
@ -205,7 +211,6 @@ namespace Grid {
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Not all functions are supported
|
||||
// through SIMD and must breakout to
|
||||
@ -214,7 +219,6 @@ namespace Grid {
|
||||
///////////////////////////////////////
|
||||
|
||||
template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) {
|
||||
|
||||
Grid_simd ret;
|
||||
Grid_simd::conv_t conv;
|
||||
|
||||
@ -225,6 +229,19 @@ namespace Grid {
|
||||
ret.v = conv.v;
|
||||
return ret;
|
||||
}
|
||||
template<class functor> friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) {
|
||||
Grid_simd ret;
|
||||
Grid_simd::conv_t cx;
|
||||
Grid_simd::conv_t cy;
|
||||
|
||||
cx.v = x.v;
|
||||
cy.v = y.v;
|
||||
for(int i=0;i<Nsimd();i++){
|
||||
cx.s[i]=func(cx.s[i],cy.s[i]);
|
||||
}
|
||||
ret.v = cx.v;
|
||||
return ret;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// General permute; assumes vector length is same across
|
||||
@ -235,6 +252,7 @@ namespace Grid {
|
||||
{
|
||||
Gpermute<Grid_simd>(y,b,perm);
|
||||
}
|
||||
|
||||
|
||||
};// end of Grid_simd class definition
|
||||
|
||||
@ -383,7 +401,6 @@ namespace Grid {
|
||||
return in;
|
||||
}
|
||||
|
||||
|
||||
/////////////////////
|
||||
// Inner, outer
|
||||
/////////////////////
|
||||
@ -405,6 +422,46 @@ namespace Grid {
|
||||
return arg;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// copy/splat complex real parts into real;
|
||||
// insert real into complex and zero imag;
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
//real = toReal( complex )
|
||||
template<class S,class V,IfReal<S> = 0>
|
||||
inline Grid_simd<S,V> toReal(const Grid_simd<std::complex<S>,V> &in)
|
||||
{
|
||||
typedef Grid_simd<S,V> simd;
|
||||
simd ret;
|
||||
typename simd::conv_t conv;
|
||||
conv.v = in.v;
|
||||
for(int i=0;i<simd::Nsimd();i+=2){
|
||||
conv.s[i+1]=conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
|
||||
}
|
||||
ret.v = conv.v;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//complex = toComplex( real )
|
||||
template<class R,class V,IfReal<R> = 0 > // must be a real arg
|
||||
inline Grid_simd<std::complex<R>,V> toComplex (const Grid_simd<R,V> &in)
|
||||
{
|
||||
typedef Grid_simd<R,V> Rsimd;
|
||||
typedef Grid_simd<std::complex<R>,V> Csimd;
|
||||
typename Rsimd::conv_t conv;// address as real
|
||||
|
||||
conv.v = in.v;
|
||||
for(int i=0;i<Rsimd::Nsimd();i+=2){
|
||||
assert(conv.s[i+1]==conv.s[i]); // trap any cases where real was not duplicated
|
||||
// indicating the SIMD grids of real and imag assignment did not correctly match
|
||||
conv.s[i+1]=0.0; // zero imaginary parts
|
||||
}
|
||||
Csimd ret;
|
||||
ret.v = conv.v;
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////
|
||||
// Define available types
|
||||
///////////////////////////////
|
||||
@ -413,6 +470,20 @@ namespace Grid {
|
||||
typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF;
|
||||
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
|
||||
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
|
||||
|
||||
/////////////////////////////////////////
|
||||
// Some traits to recognise the types
|
||||
/////////////////////////////////////////
|
||||
template <typename T> struct is_simd : public std::false_type{};
|
||||
template <> struct is_simd<vRealF> : public std::true_type {};
|
||||
template <> struct is_simd<vRealD> : public std::true_type {};
|
||||
template <> struct is_simd<vComplexF>: public std::true_type {};
|
||||
template <> struct is_simd<vComplexD>: public std::true_type {};
|
||||
template <> struct is_simd<vInteger> : public std::true_type {};
|
||||
|
||||
template <typename T> using IfSimd = Invoke<std::enable_if< is_simd<T>::value,int> > ;
|
||||
template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value,unsigned> > ;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef GRID_VECTOR_UNOPS
|
||||
#define GRID_VECTOR_UNOPS
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template<class scalar> struct SqrtRealFunctor {
|
||||
@ -27,6 +29,28 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct LogRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return log(real(a));
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct ExpRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return exp(real(a));
|
||||
}
|
||||
};
|
||||
template<class scalar> struct NotFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return (!a);
|
||||
}
|
||||
};
|
||||
template<class scalar> struct AbsRealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return std::abs(real(a));
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct PowRealFunctor {
|
||||
double y;
|
||||
PowRealFunctor(double _y) : y(_y) {};
|
||||
@ -43,6 +67,25 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
template<class scalar> struct RealFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return real(a);
|
||||
}
|
||||
};
|
||||
template<class scalar> struct ImagFunctor {
|
||||
scalar operator()(const scalar &a) const {
|
||||
return imag(a);
|
||||
}
|
||||
};
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> real(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(RealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> imag(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(ImagFunctor<S>(),r);
|
||||
}
|
||||
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(SqrtRealFunctor<S>(),r);
|
||||
@ -60,6 +103,22 @@ namespace Grid {
|
||||
return SimdApply(CosRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> log(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(LogRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> abs(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(AbsRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> exp(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(ExpRealFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> Not(const Grid_simd<S,V> &r) {
|
||||
return SimdApply(NotFunctor<S>(),r);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) {
|
||||
return SimdApply(PowRealFunctor<S>(y),r);
|
||||
}
|
||||
@ -67,6 +126,55 @@ namespace Grid {
|
||||
inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) {
|
||||
return SimdApply(ModIntFunctor<S>(y),r);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Allows us to assign into **conformable** real vectors from complex
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// template < class S, class V >
|
||||
// inline auto ComplexRemove(const Grid_simd<S,V> &c) -> Grid_simd<Grid_simd<S,V>::Real,V> {
|
||||
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
|
||||
// ret.v = c.v;
|
||||
// return ret;
|
||||
// }
|
||||
template<class scalar> struct AndFunctor {
|
||||
scalar operator()(const scalar &x, const scalar &y) const {
|
||||
return x & y;
|
||||
}
|
||||
};
|
||||
template<class scalar> struct OrFunctor {
|
||||
scalar operator()(const scalar &x, const scalar &y) const {
|
||||
return x | y;
|
||||
}
|
||||
};
|
||||
template<class scalar> struct AndAndFunctor {
|
||||
scalar operator()(const scalar &x, const scalar &y) const {
|
||||
return x && y;
|
||||
}
|
||||
};
|
||||
template<class scalar> struct OrOrFunctor {
|
||||
scalar operator()(const scalar &x, const scalar &y) const {
|
||||
return x || y;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////
|
||||
// Calls to simd binop functors
|
||||
////////////////////////////////
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> operator &(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
||||
return SimdApplyBinop(AndFunctor<S>(),x,y);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> operator &&(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
||||
return SimdApplyBinop(AndAndFunctor<S>(),x,y);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> operator |(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
||||
return SimdApplyBinop(OrFunctor<S>(),x,y);
|
||||
}
|
||||
template < class S, class V >
|
||||
inline Grid_simd<S,V> operator ||(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
|
||||
return SimdApplyBinop(OrOrFunctor<S>(),x,y);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user