mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Cleaning up simd files
This commit is contained in:
		
							
								
								
									
										4
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								TODO
									
									
									
									
									
								
							@@ -1,8 +1,8 @@
 | 
				
			|||||||
================================================================
 | 
					================================================================
 | 
				
			||||||
*** Hacks and bug fixes to clean up and Audits
 | 
					*** Hacks and bug fixes to clean up and Audits
 | 
				
			||||||
================================================================
 | 
					================================================================
 | 
				
			||||||
* Base class to share common code between vRealF, VComplexF etc... 
 | 
					* Base class to share common code between vRealF, VComplexF etc... done
 | 
				
			||||||
  - Performance check on Guido's reimplementation strategy
 | 
					  - Performance check on Guido's reimplementation strategy  - (GUIDO) tested and no difference was found, merged
 | 
				
			||||||
 | 
					
 | 
				
			||||||
* FIXME audit
 | 
					* FIXME audit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -95,100 +95,10 @@ namespace Grid {
 | 
				
			|||||||
  template<>            inline void zeroit(RealF &arg){ arg=0; };
 | 
					  template<>            inline void zeroit(RealF &arg){ arg=0; };
 | 
				
			||||||
  template<>            inline void zeroit(RealD &arg){ arg=0; };
 | 
					  template<>            inline void zeroit(RealD &arg){ arg=0; };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Eventually delete this part
 | 
					 | 
				
			||||||
#if defined (SSE4)
 | 
					 | 
				
			||||||
    typedef __m128 fvec;
 | 
					 | 
				
			||||||
    typedef __m128d dvec;
 | 
					 | 
				
			||||||
    typedef __m128 cvec;
 | 
					 | 
				
			||||||
    typedef __m128d zvec;
 | 
					 | 
				
			||||||
    typedef __m128i ivec;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#if defined (AVX1) || defined (AVX2)
 | 
					 | 
				
			||||||
    typedef __m256 fvec;
 | 
					 | 
				
			||||||
    typedef __m256d dvec;
 | 
					 | 
				
			||||||
    typedef __m256  cvec;
 | 
					 | 
				
			||||||
    typedef __m256d zvec;
 | 
					 | 
				
			||||||
    typedef __m256i ivec;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#if defined (AVX512)
 | 
					 | 
				
			||||||
    typedef __m512  fvec;
 | 
					 | 
				
			||||||
    typedef __m512d dvec;
 | 
					 | 
				
			||||||
    typedef __m512  cvec;
 | 
					 | 
				
			||||||
    typedef __m512d zvec;
 | 
					 | 
				
			||||||
    typedef __m512i ivec;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#if defined (QPX)
 | 
					 | 
				
			||||||
    typedef float  fvec __attribute__ ((vector_size (16))); // QPX has same SIMD width irrespective of precision
 | 
					 | 
				
			||||||
    typedef float  cvec __attribute__ ((vector_size (16)));
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    typedef vector4double dvec;
 | 
					 | 
				
			||||||
    typedef vector4double zvec;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
#if defined (AVX1) || defined (AVX2) || defined (AVX512)
 | 
					 | 
				
			||||||
    inline void v_prefetch0(int size, const char *ptr){
 | 
					 | 
				
			||||||
          for(int i=0;i<size;i+=64){ //  Define L1 linesize above// What about SSE?
 | 
					 | 
				
			||||||
            _mm_prefetch(ptr+i+4096,_MM_HINT_T1);
 | 
					 | 
				
			||||||
            _mm_prefetch(ptr+i+512,_MM_HINT_T0);
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
#else 
 | 
					 | 
				
			||||||
    inline void v_prefetch0(int size, const char *ptr){};
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
// Permute
 | 
					 | 
				
			||||||
// Permute 0 every ABCDEFGH -> BA DC FE HG
 | 
					 | 
				
			||||||
// Permute 1 every ABCDEFGH -> CD AB GH EF
 | 
					 | 
				
			||||||
// Permute 2 every ABCDEFGH -> EFGH ABCD
 | 
					 | 
				
			||||||
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
 | 
					 | 
				
			||||||
// Permute 4 possible on half precision @512bit vectors.
 | 
					 | 
				
			||||||
//////////////////////////////////////////////////////////
 | 
					 | 
				
			||||||
template<class vsimd>
 | 
					 | 
				
			||||||
inline void Gpermute(vsimd &y,const vsimd &b,int perm){
 | 
					 | 
				
			||||||
	union { 
 | 
					 | 
				
			||||||
	  fvec f;
 | 
					 | 
				
			||||||
	  decltype(vsimd::v) v;
 | 
					 | 
				
			||||||
	} conv;
 | 
					 | 
				
			||||||
	conv.v = b.v;
 | 
					 | 
				
			||||||
      switch (perm){
 | 
					 | 
				
			||||||
#if defined(AVX1)||defined(AVX2)
 | 
					 | 
				
			||||||
      // 8x32 bits=>3 permutes
 | 
					 | 
				
			||||||
      case 2: 
 | 
					 | 
				
			||||||
	conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); 
 | 
					 | 
				
			||||||
	break;
 | 
					 | 
				
			||||||
      case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
 | 
					 | 
				
			||||||
      case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifdef SSE4
 | 
					 | 
				
			||||||
      case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
					 | 
				
			||||||
      case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifdef AVX512
 | 
					 | 
				
			||||||
	// 16 floats=> permutes
 | 
					 | 
				
			||||||
        // Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo 
 | 
					 | 
				
			||||||
        // Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn 
 | 
					 | 
				
			||||||
        // Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
 | 
					 | 
				
			||||||
        // Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
 | 
					 | 
				
			||||||
      case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
 | 
					 | 
				
			||||||
      case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
 | 
					 | 
				
			||||||
      case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
 | 
					 | 
				
			||||||
      case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifdef QPX
 | 
					 | 
				
			||||||
#error not implemented
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
      default: assert(0); break;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
      y.v=conv.v;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <simd/Grid_vector_types.h>
 | 
					#include <simd/Grid_vector_types.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // NB: Template the following on "type Complex" and then implement *,+,- for 
 | 
					  // NB: Template the following on "type Complex" and then implement *,+,- for 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,7 +2,7 @@
 | 
				
			|||||||
/*! @file Grid_vector_types.h
 | 
					/*! @file Grid_vector_types.h
 | 
				
			||||||
  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
					  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
// Time-stamp: <2015-05-26 12:05:39 neo>
 | 
					// Time-stamp: <2015-05-26 13:22:36 neo>
 | 
				
			||||||
//---------------------------------------------------------------------------
 | 
					//---------------------------------------------------------------------------
 | 
				
			||||||
#ifndef GRID_VECTOR_TYPES
 | 
					#ifndef GRID_VECTOR_TYPES
 | 
				
			||||||
#define GRID_VECTOR_TYPES
 | 
					#define GRID_VECTOR_TYPES
 | 
				
			||||||
@@ -16,7 +16,9 @@
 | 
				
			|||||||
#if defined AVX512
 | 
					#if defined AVX512
 | 
				
			||||||
#include "Grid_knc.h"
 | 
					#include "Grid_knc.h"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					#if defined QPX
 | 
				
			||||||
 | 
					#include "Grid_qpx.h"
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -33,8 +35,6 @@ namespace Grid {
 | 
				
			|||||||
  template <typename Condition, typename ReturnType> using EnableIf   =    Invoke<std::enable_if<Condition::value, ReturnType>>;
 | 
					  template <typename Condition, typename ReturnType> using EnableIf   =    Invoke<std::enable_if<Condition::value, ReturnType>>;
 | 
				
			||||||
  template <typename Condition, typename ReturnType> using NotEnableIf=    Invoke<std::enable_if<!Condition::value, ReturnType>>;
 | 
					  template <typename Condition, typename ReturnType> using NotEnableIf=    Invoke<std::enable_if<!Condition::value, ReturnType>>;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////
 | 
				
			||||||
  // Check for complexity with type traits
 | 
					  // Check for complexity with type traits
 | 
				
			||||||
  template <typename T>     struct is_complex : std::false_type {};
 | 
					  template <typename T>     struct is_complex : std::false_type {};
 | 
				
			||||||
@@ -57,6 +57,58 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  ///////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Permute
 | 
				
			||||||
 | 
					// Permute 0 every ABCDEFGH -> BA DC FE HG
 | 
				
			||||||
 | 
					// Permute 1 every ABCDEFGH -> CD AB GH EF
 | 
				
			||||||
 | 
					// Permute 2 every ABCDEFGH -> EFGH ABCD
 | 
				
			||||||
 | 
					// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
 | 
				
			||||||
 | 
					// Permute 4 possible on half precision @512bit vectors.
 | 
				
			||||||
 | 
					//////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template<class vsimd>
 | 
				
			||||||
 | 
					inline void Gpermute(vsimd &y,const vsimd &b,int perm){
 | 
				
			||||||
 | 
						union { 
 | 
				
			||||||
 | 
						  SIMD_Ftype f;
 | 
				
			||||||
 | 
						  decltype(vsimd::v) v;
 | 
				
			||||||
 | 
						} conv;
 | 
				
			||||||
 | 
						conv.v = b.v;
 | 
				
			||||||
 | 
					      switch (perm){
 | 
				
			||||||
 | 
					#if defined(AVX1)||defined(AVX2)
 | 
				
			||||||
 | 
					      // 8x32 bits=>3 permutes
 | 
				
			||||||
 | 
					      case 2: 
 | 
				
			||||||
 | 
						conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); 
 | 
				
			||||||
 | 
						break;
 | 
				
			||||||
 | 
					      case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
 | 
				
			||||||
 | 
					      case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef SSE4
 | 
				
			||||||
 | 
					      case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
				
			||||||
 | 
					      case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef AVX512
 | 
				
			||||||
 | 
						// 16 floats=> permutes
 | 
				
			||||||
 | 
					        // Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo 
 | 
				
			||||||
 | 
					        // Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn 
 | 
				
			||||||
 | 
					        // Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
 | 
				
			||||||
 | 
					        // Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
 | 
				
			||||||
 | 
					      case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
 | 
				
			||||||
 | 
					      case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
 | 
				
			||||||
 | 
					      case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
 | 
				
			||||||
 | 
					      case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef QPX
 | 
				
			||||||
 | 
					#error not implemented
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					      default: assert(0); break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      y.v=conv.v;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					///////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /*
 | 
					  /*
 | 
				
			||||||
    @brief Grid_simd class for the SIMD vector type operations
 | 
					    @brief Grid_simd class for the SIMD vector type operations
 | 
				
			||||||
   */
 | 
					   */
 | 
				
			||||||
@@ -380,6 +432,12 @@ namespace Grid {
 | 
				
			|||||||
  typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
 | 
					  typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
 | 
				
			||||||
  typedef Grid_simd< Integer               , SIMD_Itype > vInteger;
 | 
					  typedef Grid_simd< Integer               , SIMD_Itype > vInteger;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user