mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Included Gpermute in the new Grid_simd.h file style.
Now tested for SSE4. OK
This commit is contained in:
		@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-22 18:58:27 neo>
 | 
			
		||||
// Time-stamp: <2015-05-27 12:07:15 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
@@ -383,6 +383,30 @@ namespace Grid {
 | 
			
		||||
  typedef __m256d SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef __m256i SIMD_Itype; // Integer type
 | 
			
		||||
 | 
			
		||||
  // prefecthing 
 | 
			
		||||
  inline void v_prefetch0(int size, const char *ptr){
 | 
			
		||||
    for(int i=0;i<size;i+=64){ //  Define L1 linesize above
 | 
			
		||||
      _mm_prefetch(ptr+i+4096,_MM_HINT_T1);
 | 
			
		||||
      _mm_prefetch(ptr+i+512,_MM_HINT_T0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template < typename VectorSIMD > 
 | 
			
		||||
    inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
 | 
			
		||||
    union { 
 | 
			
		||||
      __m256 f;
 | 
			
		||||
      decltype(VectorSIMD::v) v;
 | 
			
		||||
    } conv;
 | 
			
		||||
    conv.v = b.v;
 | 
			
		||||
    switch(perm){
 | 
			
		||||
    case 3: break; //empty for AVX1/2
 | 
			
		||||
    case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
    case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));  break; 
 | 
			
		||||
    case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
 | 
			
		||||
    default: assert(0); break;
 | 
			
		||||
    }
 | 
			
		||||
    y.v=conv.v;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Function name aliases
 | 
			
		||||
  typedef Optimization::Vsplat   VsplatSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-22 17:12:44 neo>
 | 
			
		||||
// Time-stamp: <2015-05-27 12:08:50 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
@@ -302,7 +302,32 @@ namespace Grid {
 | 
			
		||||
  typedef __m512d SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef __m512i SIMD_Itype; // Integer type
 | 
			
		||||
 | 
			
		||||
  // prefecth
 | 
			
		||||
  inline void v_prefetch0(int size, const char *ptr){
 | 
			
		||||
    for(int i=0;i<size;i+=64){ //  Define L1 linesize above
 | 
			
		||||
      _mm_prefetch(ptr+i+4096,_MM_HINT_T1);
 | 
			
		||||
      _mm_prefetch(ptr+i+512,_MM_HINT_T0);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Gpermute utilities consider coalescing into 1 Gpermute
 | 
			
		||||
  template < typename VectorSIMD > 
 | 
			
		||||
    inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
 | 
			
		||||
    union { 
 | 
			
		||||
      __m512 f;
 | 
			
		||||
      decltype(VectorSIMD::v) v;
 | 
			
		||||
    } conv;
 | 
			
		||||
    conv.v = b.v;
 | 
			
		||||
    switch(perm){
 | 
			
		||||
    case 3:  conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
 | 
			
		||||
    case 2:  conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break; 
 | 
			
		||||
    case 1 : conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
    case 0 : conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
 | 
			
		||||
    default: assert(0); break;
 | 
			
		||||
    }
 | 
			
		||||
    y=conv.v;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // Function name aliases
 | 
			
		||||
  typedef Optimization::Vsplat   VsplatSIMD;
 | 
			
		||||
  typedef Optimization::Vstore   VstoreSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-22 17:29:26 neo>
 | 
			
		||||
// Time-stamp: <2015-05-27 11:30:21 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// lot of undefined functions
 | 
			
		||||
@@ -251,6 +251,7 @@ namespace Grid {
 | 
			
		||||
  typedef vector4double SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef int SIMD_Itype;           // Integer type
 | 
			
		||||
 | 
			
		||||
  inline void v_prefetch0(int size, const char *ptr){};
 | 
			
		||||
 | 
			
		||||
  // Function name aliases
 | 
			
		||||
  typedef Optimization::Vsplat   VsplatSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-21 18:06:30 neo>
 | 
			
		||||
// Time-stamp: <2015-05-27 12:02:07 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <pmmintrin.h>
 | 
			
		||||
@@ -221,7 +221,7 @@ namespace Optimization {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
@@ -277,6 +277,10 @@ namespace Optimization {
 | 
			
		||||
    assert(0);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -289,6 +293,28 @@ namespace Grid {
 | 
			
		||||
  typedef __m128i SIMD_Itype; // Integer type
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  inline void v_prefetch0(int size, const char *ptr){};  // prefetch utilities
 | 
			
		||||
 | 
			
		||||
  // Gpermute function
 | 
			
		||||
  template < typename VectorSIMD > 
 | 
			
		||||
    inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
 | 
			
		||||
    union { 
 | 
			
		||||
      __m128 f;
 | 
			
		||||
      decltype(VectorSIMD::v) v;
 | 
			
		||||
    } conv;
 | 
			
		||||
    conv.v = b.v;
 | 
			
		||||
    switch(perm){
 | 
			
		||||
    case 3: break; //empty for SSE4
 | 
			
		||||
    case 2: break; //empty for SSE4
 | 
			
		||||
    case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
    case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
 | 
			
		||||
    default: assert(0); break;
 | 
			
		||||
    }
 | 
			
		||||
    y.v=conv.v;
 | 
			
		||||
  }; 
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Function name aliases
 | 
			
		||||
  typedef Optimization::Vsplat   VsplatSIMD;
 | 
			
		||||
  typedef Optimization::Vstore   VstoreSIMD;
 | 
			
		||||
@@ -296,6 +322,8 @@ namespace Grid {
 | 
			
		||||
  typedef Optimization::Vstream  VstreamSIMD;
 | 
			
		||||
  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
/*! @file Grid_vector_types.h
 | 
			
		||||
  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-26 14:08:13 neo>
 | 
			
		||||
// Time-stamp: <2015-05-27 12:04:06 neo>
 | 
			
		||||
//---------------------------------------------------------------------------
 | 
			
		||||
#ifndef GRID_VECTOR_TYPES
 | 
			
		||||
#define GRID_VECTOR_TYPES
 | 
			
		||||
@@ -58,56 +58,7 @@ namespace Grid {
 | 
			
		||||
  ///////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Move to the simd files
 | 
			
		||||
//////////////////////////////////////////////////////////
 | 
			
		||||
// Permute
 | 
			
		||||
// Permute 0 every ABCDEFGH -> BA DC FE HG
 | 
			
		||||
// Permute 1 every ABCDEFGH -> CD AB GH EF
 | 
			
		||||
// Permute 2 every ABCDEFGH -> EFGH ABCD
 | 
			
		||||
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
 | 
			
		||||
// Permute 4 possible on half precision @512bit vectors.
 | 
			
		||||
//////////////////////////////////////////////////////////
 | 
			
		||||
template<class vsimd>
 | 
			
		||||
inline void Gpermute(vsimd &y,const vsimd &b,int perm){
 | 
			
		||||
	union { 
 | 
			
		||||
	  SIMD_Ftype f;
 | 
			
		||||
	  decltype(vsimd::v) v;
 | 
			
		||||
	} conv;
 | 
			
		||||
	conv.v = b.v;
 | 
			
		||||
      switch (perm){
 | 
			
		||||
#if defined(AVX1)||defined(AVX2)
 | 
			
		||||
      // 8x32 bits=>3 permutes
 | 
			
		||||
      case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
      case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
 | 
			
		||||
      case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
      case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
      case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef AVX512
 | 
			
		||||
	// 16 floats=> permutes
 | 
			
		||||
        // Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo 
 | 
			
		||||
        // Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn 
 | 
			
		||||
        // Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
 | 
			
		||||
        // Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
 | 
			
		||||
      case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
 | 
			
		||||
      case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
 | 
			
		||||
      case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
      case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef QPX
 | 
			
		||||
#error not implemented
 | 
			
		||||
#endif
 | 
			
		||||
      default: assert(0); break;
 | 
			
		||||
      }
 | 
			
		||||
      y.v=conv.v;
 | 
			
		||||
 | 
			
		||||
 };
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
    @brief Grid_simd class for the SIMD vector type operations
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user