diff --git a/lib/Grid_simd.h b/lib/Grid_simd.h index 504e1c17..cccc82e0 100644 --- a/lib/Grid_simd.h +++ b/lib/Grid_simd.h @@ -94,139 +94,7 @@ namespace Grid { template<> inline void zeroit(ComplexD &arg){ arg=0; }; template<> inline void zeroit(RealF &arg){ arg=0; }; template<> inline void zeroit(RealD &arg){ arg=0; }; - -#if defined (SSE4) - typedef __m128 fvec; - typedef __m128d dvec; - typedef __m128 cvec; - typedef __m128d zvec; - typedef __m128i ivec; -#endif -#if defined (AVX1) || defined (AVX2) - typedef __m256 fvec; - typedef __m256d dvec; - typedef __m256 cvec; - typedef __m256d zvec; - typedef __m256i ivec; -#endif -#if defined (AVX512) - typedef __m512 fvec; - typedef __m512d dvec; - typedef __m512 cvec; - typedef __m512d zvec; - typedef __m512i ivec; -#endif -#if defined (QPX) - typedef float fvec __attribute__ ((vector_size (16))); // QPX has same SIMD width irrespective of precision - typedef float cvec __attribute__ ((vector_size (16))); - - typedef vector4double dvec; - typedef vector4double zvec; -#endif -#if defined (AVX1) || defined (AVX2) || defined (AVX512) - inline void v_prefetch0(int size, const char *ptr){ - for(int i=0;i BA DC FE HG -// Permute 1 every ABCDEFGH -> CD AB GH EF -// Permute 2 every ABCDEFGH -> EFGH ABCD -// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single) -// Permute 4 possible on half precision @512bit vectors. -////////////////////////////////////////////////////////// -template -inline void Gpermute0(vsimd &y,const vsimd &b) { - union { - fvec f; - decltype(vsimd::v) v; - } conv; - conv.v = b.v; -#ifdef SSE4 - conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); -#endif -#if defined(AVX1)||defined(AVX2) - conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); -#endif -#ifdef AVX512 - conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); -#endif - y.v=conv.v; -}; -template -inline void Gpermute1(vsimd &y,const vsimd &b) { - union { - fvec f; - decltype(vsimd::v) v; - } conv; - conv.v = b.v; -#ifdef SSE4 - conv.f = _mm_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); -#endif -#if defined(AVX1)||defined(AVX2) - conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); -#endif -#ifdef AVX512 - conv.f = _mm512_permute4f128_ps(conv.f,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); -#endif - y.v=conv.v; -}; -template -inline void Gpermute2(vsimd &y,const vsimd &b) { - union { - fvec f; - decltype(vsimd::v) v; - } conv; - conv.v = b.v; -#ifdef SSE4 -#endif -#if defined(AVX1)||defined(AVX2) - conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); -#endif -#ifdef AVX512 - conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_BADC); -#endif - y.v=conv.v; - -}; -template -inline void Gpermute3(vsimd &y,const vsimd &b) { - union { - fvec f; - decltype(vsimd::v) v; - } conv; - conv.v = b.v; -#ifdef AVX512 - conv.f = _mm512_swizzle_ps(conv.f,_MM_SWIZ_REG_CDAB); -#endif - y.v=conv.v; - -}; - -template -inline void Gpermute(vsimd &y,const vsimd &b,int perm){ - union { - fvec f; - decltype(vsimd::v) v; - } conv; - conv.v = b.v; - switch (perm){ - case 3: Gpermute3(y,b); break; - case 2: Gpermute2(y,b); break; - case 1: Gpermute1(y,b); break; - case 0: Gpermute0(y,b); break; - default: assert(0); break; - } - }; - + }; #include