mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
ICPC and GCC5 fixes
This commit is contained in:
parent
40192841a4
commit
cf27f22dc0
@ -10,17 +10,29 @@
|
|||||||
//
|
//
|
||||||
// Vector types are arch dependent
|
// Vector types are arch dependent
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
typedef uint32_t Integer;
|
||||||
|
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
#if defined(AVX1) || defined (AVX2)
|
#if defined(AVX1) || defined (AVX2)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
// _mm256_set_m128i(hi,lo); // not defined in all versions of immintrin.h
|
||||||
|
#ifndef _mm256_set_m128i
|
||||||
|
#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include <zmmintrin.h>
|
#ifndef KNC_ONLY_STORES
|
||||||
|
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
|
||||||
|
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
@ -148,16 +160,23 @@ namespace Grid {
|
|||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
template<class vsimd>
|
template<class vsimd>
|
||||||
inline void Gpermute(vsimd &y,const vsimd &b,int perm){
|
inline void Gpermute(vsimd &y,const vsimd &b,int perm){
|
||||||
|
union {
|
||||||
|
fvec f;
|
||||||
|
decltype(vsimd::v) v;
|
||||||
|
} conv;
|
||||||
|
conv.v = b.v;
|
||||||
switch (perm){
|
switch (perm){
|
||||||
#if defined(AVX1)||defined(AVX2)
|
#if defined(AVX1)||defined(AVX2)
|
||||||
// 8x32 bits=>3 permutes
|
// 8x32 bits=>3 permutes
|
||||||
case 2: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break;
|
case 2:
|
||||||
case 1: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break;
|
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
|
||||||
case 0: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break;
|
break;
|
||||||
|
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
||||||
|
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
||||||
#endif
|
#endif
|
||||||
#ifdef SSE4
|
#ifdef SSE4
|
||||||
case 1: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break;
|
case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
||||||
case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break;
|
case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
|
||||||
#endif
|
#endif
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
// 16 floats=> permutes
|
// 16 floats=> permutes
|
||||||
@ -165,16 +184,18 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
|
|||||||
// Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn
|
// Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn
|
||||||
// Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
|
// Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
|
||||||
// Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
|
// Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
|
||||||
case 3: y.v =(decltype(y.v)) _mm512_swizzle_ps((__m512)b.v,_MM_SWIZ_REG_CDAB); break;
|
case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
|
||||||
case 2: y.v =(decltype(y.v)) _mm512_swizzle_ps((__m512)b.v,_MM_SWIZ_REG_BADC); break;
|
case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
|
||||||
case 1: y.v =(decltype(y.v)) _mm512_permute4f128_ps((__m512)b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
|
case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
|
||||||
case 0: y.v =(decltype(y.v)) _mm512_permute4f128_ps((__m512)b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
|
case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
|
||||||
#endif
|
#endif
|
||||||
#ifdef QPX
|
#ifdef QPX
|
||||||
#error not implemented
|
#error not implemented
|
||||||
#endif
|
#endif
|
||||||
default: assert(0); break;
|
default: assert(0); break;
|
||||||
}
|
}
|
||||||
|
y.v=conv.v;
|
||||||
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user