mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-12 20:27:06 +01:00
Corrected AVX regression error. Tested.
This commit is contained in:
@ -299,7 +299,7 @@ namespace Optimization {
|
||||
//////////////////////////////////////////////
|
||||
// Some Template specialization
|
||||
template < typename vtype >
|
||||
void permute(vtype a, vtype b, int perm) {
|
||||
void permute(vtype &a, vtype &b, int perm) {
|
||||
union {
|
||||
__m256 f;
|
||||
vtype v;
|
||||
@ -320,11 +320,16 @@ namespace Optimization {
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){
|
||||
__m256 v1,v2;
|
||||
union {
|
||||
__m256 v;
|
||||
float f[8];
|
||||
} conv;
|
||||
Optimization::permute(v1,in,0); // sse 128; paired complex single
|
||||
v1 = _mm256_add_ps(v1,in);
|
||||
Optimization::permute(v2,v1,1); // avx 256; quad complex single
|
||||
v1 = _mm256_add_ps(v1,v2);
|
||||
return Grid::ComplexF(v1[0],v1[1]);
|
||||
conv.v = v1;
|
||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||
}
|
||||
//Real float Reduce
|
||||
template<>
|
||||
|
@ -77,9 +77,7 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
|
||||
switch (perm){
|
||||
#if defined(AVX1)||defined(AVX2)
|
||||
// 8x32 bits=>3 permutes
|
||||
case 2:
|
||||
conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1));
|
||||
break;
|
||||
case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
|
||||
case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
|
||||
case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user