mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Cleaning up simd files
This commit is contained in:
parent
257aa92421
commit
3f576830f9
4
TODO
4
TODO
@ -1,8 +1,8 @@
|
|||||||
================================================================
|
================================================================
|
||||||
*** Hacks and bug fixes to clean up and Audits
|
*** Hacks and bug fixes to clean up and Audits
|
||||||
================================================================
|
================================================================
|
||||||
* Base class to share common code between vRealF, VComplexF etc...
|
* Base class to share common code between vRealF, VComplexF etc... done
|
||||||
- Performance check on Guido's reimplementation strategy
|
- Performance check on Guido's reimplementation strategy - (GUIDO) tested and no difference was found, merged
|
||||||
|
|
||||||
* FIXME audit
|
* FIXME audit
|
||||||
|
|
||||||
|
@ -95,100 +95,10 @@ namespace Grid {
|
|||||||
template<> inline void zeroit(RealF &arg){ arg=0; };
|
template<> inline void zeroit(RealF &arg){ arg=0; };
|
||||||
template<> inline void zeroit(RealD &arg){ arg=0; };
|
template<> inline void zeroit(RealD &arg){ arg=0; };
|
||||||
|
|
||||||
// Eventually delete this part
|
|
||||||
#if defined (SSE4)
|
|
||||||
typedef __m128 fvec;
|
|
||||||
typedef __m128d dvec;
|
|
||||||
typedef __m128 cvec;
|
|
||||||
typedef __m128d zvec;
|
|
||||||
typedef __m128i ivec;
|
|
||||||
#endif
|
|
||||||
#if defined (AVX1) || defined (AVX2)
|
|
||||||
typedef __m256 fvec;
|
|
||||||
typedef __m256d dvec;
|
|
||||||
typedef __m256 cvec;
|
|
||||||
typedef __m256d zvec;
|
|
||||||
typedef __m256i ivec;
|
|
||||||
#endif
|
|
||||||
#if defined (AVX512)
|
|
||||||
typedef __m512 fvec;
|
|
||||||
typedef __m512d dvec;
|
|
||||||
typedef __m512 cvec;
|
|
||||||
typedef __m512d zvec;
|
|
||||||
typedef __m512i ivec;
|
|
||||||
#endif
|
|
||||||
#if defined (QPX)
|
|
||||||
typedef float fvec __attribute__ ((vector_size (16))); // QPX has same SIMD width irrespective of precision
|
|
||||||
typedef float cvec __attribute__ ((vector_size (16)));
|
|
||||||
|
|
||||||
typedef vector4double dvec;
|
|
||||||
typedef vector4double zvec;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined (AVX1) || defined (AVX2) || defined (AVX512)
|
|
||||||
inline void v_prefetch0(int size, const char *ptr){
|
|
||||||
for(int i=0;i<size;i+=64){ // Define L1 linesize above// What about SSE?
|
|
||||||
_mm_prefetch(ptr+i+4096,_MM_HINT_T1);
|
|
||||||
_mm_prefetch(ptr+i+512,_MM_HINT_T0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
inline void v_prefetch0(int size, const char *ptr){};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////
|
|
||||||
// Permute
|
|
||||||
// Permute 0 every ABCDEFGH -> BA DC FE HG
|
|
||||||
// Permute 1 every ABCDEFGH -> CD AB GH EF
|
|
||||||
// Permute 2 every ABCDEFGH -> EFGH ABCD
|
|
||||||
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
|
|
||||||
// Permute 4 possible on half precision @512bit vectors.
|
|
||||||
//////////////////////////////////////////////////////////
|
|
||||||
template<class vsimd>
|
|
||||||
inline void Gpermute(vsimd &y,const vsimd &b,int perm){
|
|
||||||
union {
|
|
||||||
fvec f;
|
|
||||||
decltype(vsimd::v) v;
|
|
||||||
} conv;
|
|
||||||
conv.v = b.v;
|
|
||||||
switch (perm){
|
|
||||||
#if defined(AVX1)||defined(AVX2)
|
|
||||||
// 8x32 bits=>3 permutes
|
|
||||||
case 2:
|
|
||||||
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
|
|
||||||
break;
|
|
||||||
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
|
||||||
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
|
||||||
#endif
|
|
||||||
#ifdef SSE4
|
|
||||||
case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
|
||||||
case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
|
|
||||||
#endif
|
|
||||||
#ifdef AVX512
|
|
||||||
// 16 floats=> permutes
|
|
||||||
// Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo
|
|
||||||
// Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn
|
|
||||||
// Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
|
|
||||||
// Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
|
|
||||||
case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
|
|
||||||
case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
|
|
||||||
case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
|
|
||||||
case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
|
|
||||||
#endif
|
|
||||||
#ifdef QPX
|
|
||||||
#error not implemented
|
|
||||||
#endif
|
|
||||||
default: assert(0); break;
|
|
||||||
}
|
|
||||||
y.v=conv.v;
|
|
||||||
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include <simd/Grid_vector_types.h>
|
#include <simd/Grid_vector_types.h>
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
// NB: Template the following on "type Complex" and then implement *,+,- for
|
// NB: Template the following on "type Complex" and then implement *,+,- for
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
/*! @file Grid_vector_types.h
|
/*! @file Grid_vector_types.h
|
||||||
@brief Defines templated class Grid_simd to deal with inner vector types
|
@brief Defines templated class Grid_simd to deal with inner vector types
|
||||||
*/
|
*/
|
||||||
// Time-stamp: <2015-05-26 12:05:39 neo>
|
// Time-stamp: <2015-05-26 13:22:36 neo>
|
||||||
//---------------------------------------------------------------------------
|
//---------------------------------------------------------------------------
|
||||||
#ifndef GRID_VECTOR_TYPES
|
#ifndef GRID_VECTOR_TYPES
|
||||||
#define GRID_VECTOR_TYPES
|
#define GRID_VECTOR_TYPES
|
||||||
@ -16,7 +16,9 @@
|
|||||||
#if defined AVX512
|
#if defined AVX512
|
||||||
#include "Grid_knc.h"
|
#include "Grid_knc.h"
|
||||||
#endif
|
#endif
|
||||||
|
#if defined QPX
|
||||||
|
#include "Grid_qpx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -33,8 +35,6 @@ namespace Grid {
|
|||||||
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType>>;
|
template <typename Condition, typename ReturnType> using EnableIf = Invoke<std::enable_if<Condition::value, ReturnType>>;
|
||||||
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType>>;
|
template <typename Condition, typename ReturnType> using NotEnableIf= Invoke<std::enable_if<!Condition::value, ReturnType>>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Check for complexity with type traits
|
// Check for complexity with type traits
|
||||||
template <typename T> struct is_complex : std::false_type {};
|
template <typename T> struct is_complex : std::false_type {};
|
||||||
@ -57,6 +57,58 @@ namespace Grid {
|
|||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
// Permute
|
||||||
|
// Permute 0 every ABCDEFGH -> BA DC FE HG
|
||||||
|
// Permute 1 every ABCDEFGH -> CD AB GH EF
|
||||||
|
// Permute 2 every ABCDEFGH -> EFGH ABCD
|
||||||
|
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
|
||||||
|
// Permute 4 possible on half precision @512bit vectors.
|
||||||
|
//////////////////////////////////////////////////////////
|
||||||
|
template<class vsimd>
|
||||||
|
inline void Gpermute(vsimd &y,const vsimd &b,int perm){
|
||||||
|
union {
|
||||||
|
SIMD_Ftype f;
|
||||||
|
decltype(vsimd::v) v;
|
||||||
|
} conv;
|
||||||
|
conv.v = b.v;
|
||||||
|
switch (perm){
|
||||||
|
#if defined(AVX1)||defined(AVX2)
|
||||||
|
// 8x32 bits=>3 permutes
|
||||||
|
case 2:
|
||||||
|
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
|
||||||
|
break;
|
||||||
|
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
||||||
|
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
||||||
|
#endif
|
||||||
|
#ifdef SSE4
|
||||||
|
case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
||||||
|
case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
|
||||||
|
#endif
|
||||||
|
#ifdef AVX512
|
||||||
|
// 16 floats=> permutes
|
||||||
|
// Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo
|
||||||
|
// Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn
|
||||||
|
// Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
|
||||||
|
// Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
|
||||||
|
case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
|
||||||
|
case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
|
||||||
|
case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
|
||||||
|
case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
|
||||||
|
#endif
|
||||||
|
#ifdef QPX
|
||||||
|
#error not implemented
|
||||||
|
#endif
|
||||||
|
default: assert(0); break;
|
||||||
|
}
|
||||||
|
y.v=conv.v;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@brief Grid_simd class for the SIMD vector type operations
|
@brief Grid_simd class for the SIMD vector type operations
|
||||||
*/
|
*/
|
||||||
@ -380,6 +432,12 @@ namespace Grid {
|
|||||||
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
|
typedef Grid_simd< std::complex< double >, SIMD_Dtype > vComplexD;
|
||||||
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
|
typedef Grid_simd< Integer , SIMD_Itype > vInteger;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user