mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Improvements to avx for invertible to avoid latent bug
This commit is contained in:
parent
8a29c16bde
commit
f246fe3304
@ -474,16 +474,31 @@ namespace Optimization {
|
|||||||
struct Exchange{
|
struct Exchange{
|
||||||
// 3210 ordering
|
// 3210 ordering
|
||||||
static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
||||||
|
//Invertible
|
||||||
|
//AB CD -> AC BD
|
||||||
|
//AC BD -> AB CD
|
||||||
out1= _mm256_permute2f128_ps(in1,in2,0x20);
|
out1= _mm256_permute2f128_ps(in1,in2,0x20);
|
||||||
out2= _mm256_permute2f128_ps(in1,in2,0x31);
|
out2= _mm256_permute2f128_ps(in1,in2,0x31);
|
||||||
};
|
};
|
||||||
static inline void Exchange1(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
static inline void Exchange1(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
||||||
|
//Invertible
|
||||||
|
// ABCD EFGH ->ABEF CDGH
|
||||||
|
// ABEF CDGH ->ABCD EFGH
|
||||||
out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0));
|
out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0));
|
||||||
out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2));
|
out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2));
|
||||||
};
|
};
|
||||||
static inline void Exchange2(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
static inline void Exchange2(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
||||||
out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
|
// Invertible ?
|
||||||
out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
|
// ABCD EFGH -> ACEG BDFH
|
||||||
|
// ACEG BDFH -> AEBF CGDH
|
||||||
|
// out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
|
||||||
|
// out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
|
||||||
|
// Bollocks; need
|
||||||
|
// AECG BFDH -> ABCD EFGH
|
||||||
|
out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); /*ACEG*/
|
||||||
|
out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); /*BDFH*/
|
||||||
|
out1= _mm256_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
|
||||||
|
out2= _mm256_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
|
||||||
};
|
};
|
||||||
static inline void Exchange3(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
static inline void Exchange3(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -419,8 +419,10 @@ void ExchangeTester(const functor &func)
|
|||||||
assert(found==1);
|
assert(found==1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for(int i=0;i<Nsimd;i++){
|
for(int i=0;i<Nsimd;i++){
|
||||||
// std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl;
|
assert(test1[i]==input1[i]);
|
||||||
|
assert(test2[i]==input2[i]);
|
||||||
|
}// std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl;
|
||||||
// std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl;
|
// std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl;
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user