1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-16 14:57:05 +01:00

Included Gpermute in the new Grid_simd.h file style.

Now tested for SSE4. OK
This commit is contained in:
neo
2015-05-27 12:11:44 +09:00
parent 3cb34af82c
commit 64753ea633
6 changed files with 99 additions and 56 deletions

View File

@ -4,7 +4,7 @@
Using intrinsics
*/
// Time-stamp: <2015-05-22 18:58:27 neo>
// Time-stamp: <2015-05-27 12:07:15 neo>
//----------------------------------------------------------------------
#include <immintrin.h>
@ -383,6 +383,30 @@ namespace Grid {
typedef __m256d SIMD_Dtype; // Double precision type
typedef __m256i SIMD_Itype; // Integer type
// prefecthing
inline void v_prefetch0(int size, const char *ptr){
for(int i=0;i<size;i+=64){ // Define L1 linesize above
_mm_prefetch(ptr+i+4096,_MM_HINT_T1);
_mm_prefetch(ptr+i+512,_MM_HINT_T0);
}
}
template < typename VectorSIMD >
inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) {
union {
__m256 f;
decltype(VectorSIMD::v) v;
} conv;
conv.v = b.v;
switch(perm){
case 3: break; //empty for AVX1/2
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
default: assert(0); break;
}
y.v=conv.v;
};
// Function name aliases
typedef Optimization::Vsplat VsplatSIMD;