From cf27f22dc05c5e59196e50a524142f3f8f18f14a Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 15 May 2015 11:35:02 +0100 Subject: [PATCH] ICPC and GCC5 fixes --- lib/Grid_simd.h | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/lib/Grid_simd.h b/lib/Grid_simd.h index 7ad7367b..af77591c 100644 --- a/lib/Grid_simd.h +++ b/lib/Grid_simd.h @@ -10,17 +10,29 @@ // // Vector types are arch dependent //////////////////////////////////////////////////////////////////////// - + +typedef uint32_t Integer; #ifdef SSE4 #include #endif #if defined(AVX1) || defined (AVX2) #include + +// _mm256_set_m128i(hi,lo); // not defined in all versions of immintrin.h +#ifndef _mm256_set_m128i +#define _mm256_set_m128i(hi,lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo),(hi),1) #endif + +#endif + #ifdef AVX512 #include -#include +#ifndef KNC_ONLY_STORES +#define _mm512_storenrngo_ps _mm512_store_ps // not present in AVX512 +#define _mm512_storenrngo_pd _mm512_store_pd // not present in AVX512 +#endif + #endif namespace Grid { @@ -148,16 +160,23 @@ namespace Grid { ////////////////////////////////////////////////////////// template inline void Gpermute(vsimd &y,const vsimd &b,int perm){ + union { + fvec f; + decltype(vsimd::v) v; + } conv; + conv.v = b.v; switch (perm){ #if defined(AVX1)||defined(AVX2) // 8x32 bits=>3 permutes - case 2: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break; - case 1: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break; - case 0: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break; + case 2: + conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); + break; + case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break; + case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break; #endif #ifdef SSE4 - case 1: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break; - case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break; + case 1: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break; + case 0: conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2));break; #endif #ifdef AVX512 // 16 floats=> permutes @@ -165,16 +184,18 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){ // Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn // Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl // Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh - case 3: y.v =(decltype(y.v)) _mm512_swizzle_ps((__m512)b.v,_MM_SWIZ_REG_CDAB); break; - case 2: y.v =(decltype(y.v)) _mm512_swizzle_ps((__m512)b.v,_MM_SWIZ_REG_BADC); break; - case 1: y.v =(decltype(y.v)) _mm512_permute4f128_ps((__m512)b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break; - case 0: y.v =(decltype(y.v)) _mm512_permute4f128_ps((__m512)b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break; + case 3: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); break; + case 2: conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); break; + case 1: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break; + case 0: conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break; #endif #ifdef QPX #error not implemented #endif default: assert(0); break; } + y.v=conv.v; + }; };