mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Corrected bug in integer multiplications for SSE4 and AVX2
Merge remote-tracking branch 'upstream/master' Conflicts: tests/Make.inc
This commit is contained in:
		@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-06-09 14:26:59 neo>
 | 
			
		||||
// Time-stamp: <2015-06-16 23:30:41 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
@@ -248,7 +248,7 @@ namespace Optimization {
 | 
			
		||||
      return _mm256_set_m128i(a1,a0);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined (AVX2)
 | 
			
		||||
      return _mm256_mul_epi32(a,b);
 | 
			
		||||
      return _mm256_mullo_epi32(a,b);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-06-09 14:24:01 neo>
 | 
			
		||||
// Time-stamp: <2015-06-16 23:27:54 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <pmmintrin.h>
 | 
			
		||||
@@ -97,7 +97,7 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline __m128i operator()(Integer *a){
 | 
			
		||||
      return _mm_set_epi32(a[0],a[1],a[2],a[3]);
 | 
			
		||||
      return _mm_set_epi32(a[3],a[2],a[1],a[0]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -181,7 +181,7 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline __m128i operator()(__m128i a, __m128i b){
 | 
			
		||||
      return _mm_mul_epi32(a,b);
 | 
			
		||||
      return _mm_mullo_epi32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -28,13 +28,19 @@
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////
 | 
			
		||||
  // To take the floating point type of real/complex type
 | 
			
		||||
  //////////////////////////////////////
 | 
			
		||||
  template <typename T> struct RealPart {
 | 
			
		||||
    typedef T type;
 | 
			
		||||
  };
 | 
			
		||||
  template <typename T> struct RealPart< std::complex<T> >{
 | 
			
		||||
    typedef T type;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  //////////////////////////////////////
 | 
			
		||||
  // demote a vector to real type
 | 
			
		||||
  //////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  // type alias used to simplify the syntax of std::enable_if
 | 
			
		||||
  template <typename T> using Invoke                                  =  typename T::type;
 | 
			
		||||
@@ -90,7 +96,7 @@ namespace Grid {
 | 
			
		||||
	Vector_type v;
 | 
			
		||||
	Scalar_type s[sizeof(Vector_type)/sizeof(Scalar_type)];
 | 
			
		||||
      conv_t_union(){};
 | 
			
		||||
      } conv_t;
 | 
			
		||||
    } conv_t;
 | 
			
		||||
    
 | 
			
		||||
   
 | 
			
		||||
    Vector_type v;
 | 
			
		||||
@@ -205,7 +211,6 @@ namespace Grid {
 | 
			
		||||
      return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
    // Not all functions are supported
 | 
			
		||||
    // through SIMD and must breakout to 
 | 
			
		||||
@@ -214,7 +219,6 @@ namespace Grid {
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    template<class functor> friend inline Grid_simd SimdApply (const functor &func,const Grid_simd &v) {
 | 
			
		||||
 | 
			
		||||
      Grid_simd ret;
 | 
			
		||||
      Grid_simd::conv_t conv;
 | 
			
		||||
 | 
			
		||||
@@ -225,6 +229,19 @@ namespace Grid {
 | 
			
		||||
      ret.v = conv.v;
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class functor> friend inline Grid_simd SimdApplyBinop (const functor &func,const Grid_simd &x,const Grid_simd &y) {
 | 
			
		||||
      Grid_simd ret;
 | 
			
		||||
      Grid_simd::conv_t cx;
 | 
			
		||||
      Grid_simd::conv_t cy;
 | 
			
		||||
 | 
			
		||||
      cx.v = x.v;
 | 
			
		||||
      cy.v = y.v;
 | 
			
		||||
      for(int i=0;i<Nsimd();i++){
 | 
			
		||||
	cx.s[i]=func(cx.s[i],cy.s[i]);
 | 
			
		||||
      }
 | 
			
		||||
      ret.v = cx.v;
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // General permute; assumes vector length is same across 
 | 
			
		||||
@@ -235,6 +252,7 @@ namespace Grid {
 | 
			
		||||
    {
 | 
			
		||||
      Gpermute<Grid_simd>(y,b,perm);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
  };// end of Grid_simd class definition 
 | 
			
		||||
 | 
			
		||||
@@ -383,7 +401,6 @@ namespace Grid {
 | 
			
		||||
    return in;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /////////////////////
 | 
			
		||||
  // Inner, outer
 | 
			
		||||
  /////////////////////
 | 
			
		||||
@@ -405,6 +422,46 @@ namespace Grid {
 | 
			
		||||
    return arg;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
  // copy/splat complex real parts into real;
 | 
			
		||||
  // insert real into complex and zero imag;
 | 
			
		||||
  ////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  //real = toReal( complex )
 | 
			
		||||
  template<class S,class V,IfReal<S>  = 0>	
 | 
			
		||||
  inline Grid_simd<S,V> toReal(const Grid_simd<std::complex<S>,V> &in)
 | 
			
		||||
  {
 | 
			
		||||
    typedef Grid_simd<S,V> simd;
 | 
			
		||||
    simd ret;
 | 
			
		||||
    typename simd::conv_t conv;
 | 
			
		||||
    conv.v = in.v;
 | 
			
		||||
    for(int i=0;i<simd::Nsimd();i+=2){
 | 
			
		||||
      conv.s[i+1]=conv.s[i];    // duplicate (r,r);(r,r);(r,r); etc...
 | 
			
		||||
    }
 | 
			
		||||
    ret.v = conv.v;
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  //complex = toComplex( real )
 | 
			
		||||
  template<class R,class V,IfReal<R> = 0 >	// must be a real arg
 | 
			
		||||
  inline Grid_simd<std::complex<R>,V> toComplex (const Grid_simd<R,V> &in)
 | 
			
		||||
  {
 | 
			
		||||
    typedef Grid_simd<R,V> Rsimd;
 | 
			
		||||
    typedef Grid_simd<std::complex<R>,V> Csimd;
 | 
			
		||||
    typename Rsimd::conv_t conv;// address as real
 | 
			
		||||
    
 | 
			
		||||
    conv.v = in.v;
 | 
			
		||||
    for(int i=0;i<Rsimd::Nsimd();i+=2){
 | 
			
		||||
      assert(conv.s[i+1]==conv.s[i]); // trap any cases where real was not duplicated 
 | 
			
		||||
      // indicating the SIMD grids of real and imag assignment did not correctly match
 | 
			
		||||
      conv.s[i+1]=0.0;                // zero imaginary parts
 | 
			
		||||
    }
 | 
			
		||||
    Csimd ret;
 | 
			
		||||
    ret.v = conv.v;
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////
 | 
			
		||||
  // Define available types
 | 
			
		||||
  ///////////////////////////////
 | 
			
		||||
@@ -413,6 +470,20 @@ namespace Grid {
 | 
			
		||||
  typedef Grid_simd< std::complex< float > , SIMD_Ftype > vComplexF;
 | 
			
		||||
  typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
 | 
			
		||||
  typedef Grid_simd< Integer               , SIMD_Itype > vInteger;
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////
 | 
			
		||||
  // Some traits to recognise the types
 | 
			
		||||
  /////////////////////////////////////////
 | 
			
		||||
  template <typename T> struct is_simd : public std::false_type{};
 | 
			
		||||
  template <> struct is_simd<vRealF>   : public std::true_type {};
 | 
			
		||||
  template <> struct is_simd<vRealD>   : public std::true_type {};
 | 
			
		||||
  template <> struct is_simd<vComplexF>: public std::true_type {};
 | 
			
		||||
  template <> struct is_simd<vComplexD>: public std::true_type {};
 | 
			
		||||
  template <> struct is_simd<vInteger> : public std::true_type {};
 | 
			
		||||
 | 
			
		||||
  template <typename T> using IfSimd     = Invoke<std::enable_if< is_simd<T>::value,int> > ;
 | 
			
		||||
  template <typename T> using IfNotSimd  = Invoke<std::enable_if<!is_simd<T>::value,unsigned> > ;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
#ifndef GRID_VECTOR_UNOPS
 | 
			
		||||
#define GRID_VECTOR_UNOPS
 | 
			
		||||
 | 
			
		||||
#include <cmath>
 | 
			
		||||
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct SqrtRealFunctor {
 | 
			
		||||
@@ -27,6 +29,28 @@ namespace Grid {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct LogRealFunctor {
 | 
			
		||||
    scalar operator()(const scalar &a)  const {
 | 
			
		||||
      return log(real(a));
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct ExpRealFunctor {
 | 
			
		||||
    scalar operator()(const scalar &a)  const {
 | 
			
		||||
      return exp(real(a));
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class scalar> struct NotFunctor {
 | 
			
		||||
    scalar operator()(const scalar &a)  const {
 | 
			
		||||
      return (!a);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class scalar> struct AbsRealFunctor {
 | 
			
		||||
    scalar operator()(const scalar &a)  const {
 | 
			
		||||
      return std::abs(real(a));
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct PowRealFunctor {
 | 
			
		||||
    double y;
 | 
			
		||||
  PowRealFunctor(double _y) : y(_y) {};
 | 
			
		||||
@@ -43,6 +67,25 @@ namespace Grid {
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  template<class scalar> struct RealFunctor {
 | 
			
		||||
    scalar operator()(const scalar &a)  const {
 | 
			
		||||
      return real(a);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class scalar> struct ImagFunctor {
 | 
			
		||||
    scalar operator()(const scalar &a)  const {
 | 
			
		||||
      return imag(a);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> real(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(RealFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> imag(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(ImagFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> sqrt(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(SqrtRealFunctor<S>(),r);
 | 
			
		||||
@@ -60,6 +103,22 @@ namespace Grid {
 | 
			
		||||
    return SimdApply(CosRealFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> log(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(LogRealFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> abs(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(AbsRealFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> exp(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(ExpRealFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> Not(const Grid_simd<S,V> &r) {
 | 
			
		||||
    return SimdApply(NotFunctor<S>(),r);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> pow(const Grid_simd<S,V> &r,double y) {
 | 
			
		||||
    return SimdApply(PowRealFunctor<S>(y),r);
 | 
			
		||||
  }
 | 
			
		||||
@@ -67,6 +126,55 @@ namespace Grid {
 | 
			
		||||
  inline Grid_simd<S,V> mod(const Grid_simd<S,V> &r,Integer y) {
 | 
			
		||||
    return SimdApply(ModIntFunctor<S>(y),r);
 | 
			
		||||
  }
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Allows us to assign into **conformable** real vectors from complex
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  //  template < class S, class V > 
 | 
			
		||||
  //  inline auto ComplexRemove(const Grid_simd<S,V> &c) -> Grid_simd<Grid_simd<S,V>::Real,V> {
 | 
			
		||||
  //    Grid_simd<Grid_simd<S,V>::Real,V> ret;
 | 
			
		||||
  //    ret.v = c.v;
 | 
			
		||||
  //    return ret;
 | 
			
		||||
  //  }
 | 
			
		||||
  template<class scalar> struct AndFunctor {
 | 
			
		||||
    scalar operator()(const scalar &x, const scalar &y)  const {
 | 
			
		||||
      return x & y;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class scalar> struct OrFunctor {
 | 
			
		||||
    scalar operator()(const scalar &x, const scalar &y)  const {
 | 
			
		||||
      return x | y;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class scalar> struct AndAndFunctor {
 | 
			
		||||
    scalar operator()(const scalar &x, const scalar &y)  const {
 | 
			
		||||
      return x && y;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class scalar> struct OrOrFunctor {
 | 
			
		||||
    scalar operator()(const scalar &x, const scalar &y)  const {
 | 
			
		||||
      return x || y;
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  // Calls to simd binop functors
 | 
			
		||||
  ////////////////////////////////
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> operator &(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
 | 
			
		||||
    return SimdApplyBinop(AndFunctor<S>(),x,y);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> operator &&(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
 | 
			
		||||
    return SimdApplyBinop(AndAndFunctor<S>(),x,y);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> operator |(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
 | 
			
		||||
    return SimdApplyBinop(OrFunctor<S>(),x,y);
 | 
			
		||||
  }
 | 
			
		||||
  template < class S, class V > 
 | 
			
		||||
  inline Grid_simd<S,V> operator ||(const Grid_simd<S,V> &x,const Grid_simd<S,V> &y) {
 | 
			
		||||
    return SimdApplyBinop(OrOrFunctor<S>(),x,y);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user