mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-02 21:14:32 +00:00 
			
		
		
		
	Merge remote-tracking branch 'upstream/master'
Conflicts: lib/math/Grid_math_tensors.h lib/simd/Grid_vector_types.h
This commit is contained in:
		@@ -283,6 +283,7 @@ namespace Optimization {
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Here assign types 
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  typedef __m128 SIMD_Ftype;  // Single precision type
 | 
			
		||||
  typedef __m128d SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef __m128i SIMD_Itype; // Integer type
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
/*! @file Grid_vector_types.h
 | 
			
		||||
  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-22 17:08:19 neo>
 | 
			
		||||
// Time-stamp: <2015-05-26 12:05:39 neo>
 | 
			
		||||
//---------------------------------------------------------------------------
 | 
			
		||||
#ifndef GRID_VECTOR_TYPES
 | 
			
		||||
#define GRID_VECTOR_TYPES
 | 
			
		||||
@@ -21,31 +21,24 @@
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  // To take the floating point type of real/complex type
 | 
			
		||||
  template <typename T> 
 | 
			
		||||
    struct RealPart {
 | 
			
		||||
      typedef T type;
 | 
			
		||||
    };
 | 
			
		||||
  template <typename T> 
 | 
			
		||||
    struct RealPart< std::complex<T> >{
 | 
			
		||||
  template <typename T> struct RealPart {
 | 
			
		||||
    typedef T type;
 | 
			
		||||
  };
 | 
			
		||||
  template <typename T> struct RealPart< std::complex<T> >{
 | 
			
		||||
    typedef T type;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // type alias used to simplify the syntax of std::enable_if
 | 
			
		||||
  template <typename T> using Invoke =
 | 
			
		||||
    typename T::type;
 | 
			
		||||
  template <typename Condition, typename ReturnType> using EnableIf =
 | 
			
		||||
    Invoke<std::enable_if<Condition::value, ReturnType>>;
 | 
			
		||||
  template <typename Condition, typename ReturnType> using NotEnableIf =
 | 
			
		||||
    Invoke<std::enable_if<!Condition::value, ReturnType>>;
 | 
			
		||||
  template <typename T> using Invoke                                  =  typename T::type;
 | 
			
		||||
  template <typename Condition, typename ReturnType> using EnableIf   =    Invoke<std::enable_if<Condition::value, ReturnType>>;
 | 
			
		||||
  template <typename Condition, typename ReturnType> using NotEnableIf=    Invoke<std::enable_if<!Condition::value, ReturnType>>;
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Check for complexity with type traits
 | 
			
		||||
  template <typename T> 
 | 
			
		||||
    struct is_complex : std::false_type {};
 | 
			
		||||
  template < typename T > 
 | 
			
		||||
    struct is_complex< std::complex<T> >: std::true_type {};
 | 
			
		||||
  template <typename T>     struct is_complex : std::false_type {};
 | 
			
		||||
  template < typename T >   struct is_complex< std::complex<T> >: std::true_type {};
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
  // Define the operation templates functors
 | 
			
		||||
  // general forms to allow for vsplat syntax
 | 
			
		||||
@@ -102,8 +95,6 @@ namespace Grid {
 | 
			
		||||
    Grid_simd(const Real a){
 | 
			
		||||
      vsplat(*this,Scalar_type(a));
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
       
 | 
			
		||||
    ///////////////////////////////////////////////
 | 
			
		||||
    // mac, mult, sub, add, adj
 | 
			
		||||
@@ -145,10 +136,6 @@ namespace Grid {
 | 
			
		||||
      friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
 | 
			
		||||
    template <  class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 > 
 | 
			
		||||
      friend inline void vfalse(Grid_simd &ret){vsplat(ret,0);}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   
 | 
			
		||||
 
 | 
			
		||||
   
 | 
			
		||||
    ////////////////////////////////////
 | 
			
		||||
    // Arithmetic operator overloads +,-,*
 | 
			
		||||
@@ -184,7 +171,6 @@ namespace Grid {
 | 
			
		||||
	ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
 | 
			
		||||
	return ret;
 | 
			
		||||
      };
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // FIXME:  gonna remove these load/store, get, set, prefetch
 | 
			
		||||
 
 | 
			
		||||
@@ -345,20 +345,30 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){
 | 
			
		||||
// REDUCE FIXME must be a cleaner implementation
 | 
			
		||||
       friend inline ComplexD Reduce(const vComplexD & in)
 | 
			
		||||
       { 
 | 
			
		||||
	 vComplexD v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   zvec v;
 | 
			
		||||
	   double f[sizeof(zvec)/sizeof(double)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
	   
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 return ComplexD(in.v[0],in.v[1]);
 | 
			
		||||
	 v1=in;
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vComplexD v1;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 return ComplexD(v1.v[0],v1.v[1]);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
	 return ComplexD(_mm512_mask_reduce_add_pd(0x55, in.v),_mm512_mask_reduce_add_pd(0xAA, in.v));
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); // avx 256; quad complex single
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif 
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#error
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v = v1.v;
 | 
			
		||||
	 return ComplexD(conv.f[0],conv.f[1]);
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        // Unary negation
 | 
			
		||||
 
 | 
			
		||||
@@ -234,26 +234,34 @@ namespace Grid {
 | 
			
		||||
	}
 | 
			
		||||
	friend inline ComplexF Reduce(const vComplexF & in)
 | 
			
		||||
	{
 | 
			
		||||
	 vComplexF v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   cvec v;
 | 
			
		||||
	   float f[sizeof(cvec)/sizeof(float)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 vComplexF v1;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 return ComplexF(v1.v[0],v1.v[1]);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vComplexF v1,v2;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); // avx 256; quad complex single
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return ComplexF(v1.v[0],v1.v[1]);
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
            return ComplexF(_mm512_mask_reduce_add_ps(0x5555, in.v),_mm512_mask_reduce_add_ps(0xAAAA, in.v));
 | 
			
		||||
	 permute(v1,in,0); // avx512 octo-complex single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#error
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v = v1.v;
 | 
			
		||||
	 return ComplexF(conv.f[0],conv.f[1]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        friend inline vComplexF operator * (const ComplexF &a, vComplexF b){
 | 
			
		||||
 
 | 
			
		||||
@@ -210,25 +210,33 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
       friend inline RealD Reduce(const vRealD & in)
 | 
			
		||||
       {
 | 
			
		||||
	 vRealD v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   dvec v;
 | 
			
		||||
	   double f[sizeof(dvec)/sizeof(double)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 vRealD v1;
 | 
			
		||||
	 permute(v1,in,0); // sse 128; paired real double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 return RealD(v1.v[0]);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vRealD v1,v2;
 | 
			
		||||
	 permute(v1,in,0); // avx 256; quad double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return v1.v[0];
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
            return _mm512_reduce_add_pd(in.v);
 | 
			
		||||
	 permute(v1,in,0); // avx 512; octo-double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v=v1.v;
 | 
			
		||||
	 return conv.f[0];
 | 
			
		||||
       }
 | 
			
		||||
 | 
			
		||||
        // *=,+=,-= operators
 | 
			
		||||
 
 | 
			
		||||
@@ -243,29 +243,39 @@ friend inline void vstore(const vRealF &ret, float *a){
 | 
			
		||||
        }
 | 
			
		||||
       friend inline RealF Reduce(const vRealF & in)
 | 
			
		||||
       {
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 vRealF v1,v2;
 | 
			
		||||
	 union { 
 | 
			
		||||
	   fvec v;
 | 
			
		||||
	   float f[sizeof(fvec)/sizeof(double)];
 | 
			
		||||
	 } conv;
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
	 permute(v1,in,0); // sse 128; quad single
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return v1.v[0];
 | 
			
		||||
#endif
 | 
			
		||||
#if defined(AVX1) || defined (AVX2)
 | 
			
		||||
	 vRealF v1,v2;
 | 
			
		||||
	 permute(v1,in,0); // avx 256; octo-double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 return v1.v[0];
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
            return _mm512_reduce_add_ps(in.v);
 | 
			
		||||
	 permute(v1,in,0); // avx 256; octo-double
 | 
			
		||||
	 v1=v1+in;
 | 
			
		||||
	 permute(v2,v1,1); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,2); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
	 permute(v2,v1,3); 
 | 
			
		||||
	 v1=v1+v2;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#endif
 | 
			
		||||
	 conv.v=v1.v;
 | 
			
		||||
	 return conv.f[0];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // *=,+=,-= operators
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user