1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-12 20:27:06 +01:00

Corrected bug in integer multiplications for SSE4 and AVX2

Merge remote-tracking branch 'upstream/master'

Conflicts:
	tests/Make.inc
This commit is contained in:
neo
2015-06-16 23:34:45 +09:00
37 changed files with 1341 additions and 515 deletions

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-06-09 14:26:59 neo>
// Time-stamp: <2015-06-16 23:30:41 neo>
//----------------------------------------------------------------------
#include <immintrin.h>
@ -248,7 +248,7 @@ namespace Optimization {
return _mm256_set_m128i(a1,a0);
#endif
#if defined (AVX2)
return _mm256_mul_epi32(a,b);
return _mm256_mullo_epi32(a,b);
#endif
}

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-06-09 14:24:01 neo>
// Time-stamp: <2015-06-16 23:27:54 neo>
//----------------------------------------------------------------------
#include <pmmintrin.h>
@ -97,7 +97,7 @@ namespace Optimization {
}
// Integer
inline __m128i operator()(Integer *a){
return _mm_set_epi32(a[0],a[1],a[2],a[3]);
return _mm_set_epi32(a[3],a[2],a[1],a[0]);
}
@ -181,7 +181,7 @@ namespace Optimization {
}
// Integer
inline __m128i operator()(__m128i a, __m128i b){
return _mm_mul_epi32(a,b);
return _mm_mullo_epi32(a,b);
}
};

View File

@ -28,13 +28,19 @@
namespace Grid {
//////////////////////////////////////
// To take the floating point type of real/complex type
//////////////////////////////////////
template <typename T> struct RealPart {
typedef T type;
};
template <typename T> struct RealPart< std::complex<T> >{
typedef T type;
};
//////////////////////////////////////
// demote a vector to real type
//////////////////////////////////////
// type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke = typename T::type;
@ -90,7 +96,7 @@ namespace Grid {
Vector_type v;
Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)];
conv_t_union(){};
} conv_t;
} conv_t;
Vector_type v;
@ -205,7 +211,6 @@ namespace Grid {
return *this;
}
///////////////////////////////////////
// Not all functions are supported
// through SIMD and must breakout to
@ -214,7 +219,6 @@ namespace Grid {
///////////////////////////////////////
template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) {
Grid_simd ret;
Grid_simd::conv_t conv;
@ -225,6 +229,19 @@ namespace Grid {
ret.v = conv.v;
return ret;
}
template<class functor> friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) {
Grid_simd ret;
Grid_simd::conv_t cx;
Grid_simd::conv_t cy;
cx.v = x.v;
cy.v = y.v;
for(int i=0;i<Nsimd();i++){
cx.s[i]=func(cx.s[i],cy.s[i]);
}
ret.v = cx.v;
return ret;
}
////////////////////////////////////////////////////////////////////
// General permute; assumes vector length is same across
@ -235,6 +252,7 @@ namespace Grid {
{
Gpermute<Grid_simd>(y,b,perm);
}
};// end of Grid_simd class definition
@ -383,7 +401,6 @@ namespace Grid {
return in;
}
/////////////////////
// Inner, outer
/////////////////////
@ -405,6 +422,46 @@ namespace Grid {
return arg;
}
////////////////////////////////////////////////////////////
// copy/splat complex real parts into real;
// insert real into complex and zero imag;
////////////////////////////////////////////////////////////
//real = toReal( complex )
template<class S,class V,IfReal<S> = 0>
inline Grid_simd<S,V> toReal(const Grid_simd<std::complex<S>,V> &in)
{
typedef Grid_simd<S,V> simd;
simd ret;
typename simd::conv_t conv;
conv.v = in.v;
for(int i=0;i<simd::Nsimd();i+=2){
conv.s[i+1]=conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc...
}
ret.v = conv.v;
return ret;
}
//complex = toComplex( real )
template<class R,class V,IfReal<R> = 0 > // must be a real arg
inline Grid_simd<std::complex<R>,V> toComplex (const Grid_simd<R,V> &in)
{
typedef Grid_simd<R,V> Rsimd;
typedef Grid_simd<std::complex<R>,V> Csimd;
typename Rsimd::conv_t conv;// address as real
conv.v = in.v;
for(int i=0;i<Rsimd::Nsimd();i+=2){
assert(conv.s[i+1]==conv.s[i]); // trap any cases where real was not duplicated
// indicating the SIMD grids of real and imag assignment did not correctly match
conv.s[i+1]=0.0; // zero imaginary parts
}
Csimd ret;
ret.v = conv.v;
return ret;
}
///////////////////////////////
// Define available types
///////////////////////////////
@ -413,6 +470,20 @@ namespace Grid {
typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF;
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
/////////////////////////////////////////
// Some traits to recognise the types
/////////////////////////////////////////
template <typename T> struct is_simd : public std::false_type{};
template <> struct is_simd<vRealF> : public std::true_type {};
template <> struct is_simd<vRealD> : public std::true_type {};
template <> struct is_simd<vComplexF>: public std::true_type {};
template <> struct is_simd<vComplexD>: public std::true_type {};
template <> struct is_simd<vInteger> : public std::true_type {};
template <typename T> using IfSimd = Invoke<std::enable_if< is_simd<T>::value,int> > ;
template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value,unsigned> > ;
}
#endif

View File

@ -1,6 +1,8 @@
#ifndef GRID_VECTOR_UNOPS
#define GRID_VECTOR_UNOPS
#include <cmath>
namespace Grid {
template<class scalar> struct SqrtRealFunctor {
@ -27,6 +29,28 @@ namespace Grid {
}
};
template<class scalar> struct LogRealFunctor {
scalar operator()(const scalar &a) const {
return log(real(a));
}
};
template<class scalar> struct ExpRealFunctor {
scalar operator()(const scalar &a) const {
return exp(real(a));
}
};
template<class scalar> struct NotFunctor {
scalar operator()(const scalar &a) const {
return (!a);
}
};
template<class scalar> struct AbsRealFunctor {
scalar operator()(const scalar &a) const {
return std::abs(real(a));
}
};
template<class scalar> struct PowRealFunctor {
double y;
PowRealFunctor(double _y) : y(_y) {};
@ -43,6 +67,25 @@ namespace Grid {
}
};
template<class scalar> struct RealFunctor {
scalar operator()(const scalar &a) const {
return real(a);
}
};
template<class scalar> struct ImagFunctor {
scalar operator()(const scalar &a) const {
return imag(a);
}
};
template < class S, class V >
inline Grid_simd<S,V> real(const Grid_simd<S,V> &r) {
return SimdApply(RealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> imag(const Grid_simd<S,V> &r) {
return SimdApply(ImagFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) {
return SimdApply(SqrtRealFunctor<S>(),r);
@ -60,6 +103,22 @@ namespace Grid {
return SimdApply(CosRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> log(const Grid_simd<S,V> &r) {
return SimdApply(LogRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> abs(const Grid_simd<S,V> &r) {
return SimdApply(AbsRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> exp(const Grid_simd<S,V> &r) {
return SimdApply(ExpRealFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> Not(const Grid_simd<S,V> &r) {
return SimdApply(NotFunctor<S>(),r);
}
template < class S, class V >
inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) {
return SimdApply(PowRealFunctor<S>(y),r);
}
@ -67,6 +126,55 @@ namespace Grid {
inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) {
return SimdApply(ModIntFunctor<S>(y),r);
}
////////////////////////////////////////////////////////////////////////////
// Allows us to assign into **conformable** real vectors from complex
////////////////////////////////////////////////////////////////////////////
// template < class S, class V >
// inline auto ComplexRemove(const Grid_simd<S,V> &c) -> Grid_simd<Grid_simd<S,V>::Real,V> {
// Grid_simd<Grid_simd<S,V>::Real,V> ret;
// ret.v = c.v;
// return ret;
// }
template<class scalar> struct AndFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x & y;
}
};
template<class scalar> struct OrFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x | y;
}
};
template<class scalar> struct AndAndFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x && y;
}
};
template<class scalar> struct OrOrFunctor {
scalar operator()(const scalar &x, const scalar &y) const {
return x || y;
}
};
////////////////////////////////
// Calls to simd binop functors
////////////////////////////////
template < class S, class V >
inline Grid_simd<S,V> operator &(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(AndFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator &&(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(AndAndFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator |(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(OrFunctor<S>(),x,y);
}
template < class S, class V >
inline Grid_simd<S,V> operator ||(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
return SimdApplyBinop(OrOrFunctor<S>(),x,y);
}
}
#endif