1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

FMA4 option moved on the align

This commit is contained in:
paboyle 2016-10-11 10:03:01 +01:00
parent 8d11681aac
commit 6f408256bc

View File

@ -449,12 +449,12 @@ namespace Optimization {
}; };
#if defined (AVX2) || defined (AVXFMA4) #if defined (AVX2)
#define _mm256_alignr_epi32_grid(ret,a,b,n) ret=(__m256) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*4)%16) #define _mm256_alignr_epi32_grid(ret,a,b,n) ret=(__m256) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*4)%16)
#define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16) #define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16)
#endif #endif
#if defined (AVX1) #if defined (AVX1) || defined (AVXFMA4)
#define _mm256_alignr_epi32_grid(ret,a,b,n) { \ #define _mm256_alignr_epi32_grid(ret,a,b,n) { \
__m128 aa, bb; \ __m128 aa, bb; \
\ \
@ -484,20 +484,7 @@ namespace Optimization {
} }
#endif #endif
/*
inline std::ostream & operator << (std::ostream& stream, const __m256 a)
{
const float *p=(const float *)&a;
stream<< "{"<<p[0]<<","<<p[1]<<","<<p[2]<<","<<p[3]<<","<<p[4]<<","<<p[5]<<","<<p[6]<<","<<p[7]<<"}";
return stream;
};
inline std::ostream & operator<< (std::ostream& stream, const __m256d a)
{
const double *p=(const double *)&a;
stream<< "{"<<p[0]<<","<<p[1]<<","<<p[2]<<","<<p[3]<<"}";
return stream;
};
*/
struct Rotate{ struct Rotate{
static inline __m256 rotate(__m256 in,int n){ static inline __m256 rotate(__m256 in,int n){
@ -533,7 +520,6 @@ namespace Optimization {
} else { } else {
_mm256_alignr_epi32_grid(ret,tmp,in,n); _mm256_alignr_epi32_grid(ret,tmp,in,n);
} }
// std::cout << " align epi32 n=" <<n<<" in "<<tmp<<in<<" -> "<< ret <<std::endl;
return ret; return ret;
}; };
@ -546,14 +532,11 @@ namespace Optimization {
} else { } else {
_mm256_alignr_epi64_grid(ret,tmp,in,n); _mm256_alignr_epi64_grid(ret,tmp,in,n);
} }
// std::cout << " align epi64 n=" <<n<<" in "<<tmp<<in<<" -> "<< ret <<std::endl;
return ret; return ret;
}; };
}; };
//Complex float Reduce //Complex float Reduce
template<> template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){ inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){