1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 17:25:37 +01:00

ICPC and GCC5 fixes

This commit is contained in:
Peter Boyle 2015-05-15 11:35:02 +01:00
parent 40192841a4
commit cf27f22dc0

View File

@ -11,16 +11,28 @@
// Vector types are arch dependent // Vector types are arch dependent
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
typedef uint32_t Integer;
#ifdef SSE4 #ifdef SSE4
#include <pmmintrin.h> #include <pmmintrin.h>
#endif #endif
#if defined(AVX1) || defined (AVX2) #if defined(AVX1) || defined (AVX2)
#include <immintrin.h> #include <immintrin.h>
// _mm256_set_m128i(hi,lo); // not defined in all versions of immintrin.h
#ifndef _mm256_set_m128i
#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1)
#endif #endif
#endif
#ifdef AVX512 #ifdef AVX512
#include <immintrin.h> #include <immintrin.h>
#include <zmmintrin.h> #ifndef KNC_ONLY_STORES
#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512
#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512
#endif
#endif #endif
namespace Grid { namespace Grid {
@ -148,16 +160,23 @@ namespace Grid {
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
template<class vsimd> template<class vsimd>
inline void Gpermute(vsimd &y,const vsimd &b,int perm){ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
union {
fvec f;
decltype(vsimd::v) v;
} conv;
conv.v = b.v;
switch (perm){ switch (perm){
#if defined(AVX1)||defined(AVX2) #if defined(AVX1)||defined(AVX2)
// 8x32 bits=>3 permutes // 8x32 bits=>3 permutes
case 2: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break; case 2:
case 1: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break; conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
case 0: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break; break;
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
#endif #endif
#ifdef SSE4 #ifdef SSE4
case 1: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break; case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break; case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break;
#endif #endif
#ifdef AVX512 #ifdef AVX512
// 16 floats=> permutes // 16 floats=> permutes
@ -165,16 +184,18 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
// Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn // Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn
// Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl // Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
// Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh // Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
case 3: y.v =(decltype(y.v)) _mm512_swizzle_ps((__m512)b.v,_MM_SWIZ_REG_CDAB); break; case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break;
case 2: y.v =(decltype(y.v)) _mm512_swizzle_ps((__m512)b.v,_MM_SWIZ_REG_BADC); break; case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break;
case 1: y.v =(decltype(y.v)) _mm512_permute4f128_ps((__m512)b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break; case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
case 0: y.v =(decltype(y.v)) _mm512_permute4f128_ps((__m512)b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break; case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
#endif #endif
#ifdef QPX #ifdef QPX
#error not implemented #error not implemented
#endif #endif
default: assert(0); break; default: assert(0); break;
} }
y.v=conv.v;
}; };
}; };