mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Improvements to avx for invertible to avoid latent bug
This commit is contained in:
		@@ -474,16 +474,31 @@ namespace Optimization {
 | 
				
			|||||||
  struct Exchange{
 | 
					  struct Exchange{
 | 
				
			||||||
    // 3210 ordering
 | 
					    // 3210 ordering
 | 
				
			||||||
    static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
					    static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
				
			||||||
 | 
					      //Invertible
 | 
				
			||||||
 | 
					      //AB CD ->  AC BD
 | 
				
			||||||
 | 
					      //AC BD ->  AB CD
 | 
				
			||||||
      out1= _mm256_permute2f128_ps(in1,in2,0x20);
 | 
					      out1= _mm256_permute2f128_ps(in1,in2,0x20);
 | 
				
			||||||
      out2= _mm256_permute2f128_ps(in1,in2,0x31);
 | 
					      out2= _mm256_permute2f128_ps(in1,in2,0x31);
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    static inline void Exchange1(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
					    static inline void Exchange1(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
				
			||||||
 | 
					      //Invertible
 | 
				
			||||||
 | 
					      // ABCD EFGH  ->ABEF CDGH
 | 
				
			||||||
 | 
					      // ABEF CDGH  ->ABCD EFGH
 | 
				
			||||||
      out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0));
 | 
					      out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0));
 | 
				
			||||||
      out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2));
 | 
					      out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,2,3,2));
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    static inline void Exchange2(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
					    static inline void Exchange2(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
				
			||||||
      out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
 | 
					      // Invertible ? 
 | 
				
			||||||
      out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
 | 
					      // ABCD EFGH -> ACEG BDFH
 | 
				
			||||||
 | 
					      // ACEG BDFH -> AEBF CGDH
 | 
				
			||||||
 | 
					      //      out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
 | 
				
			||||||
 | 
					      //      out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
 | 
				
			||||||
 | 
					      // Bollocks; need 
 | 
				
			||||||
 | 
					      // AECG BFDH -> ABCD EFGH
 | 
				
			||||||
 | 
					      out1= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); /*ACEG*/
 | 
				
			||||||
 | 
					      out2= _mm256_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); /*BDFH*/
 | 
				
			||||||
 | 
					      out1= _mm256_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
				
			||||||
 | 
					      out2= _mm256_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    static inline void Exchange3(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
					    static inline void Exchange3(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
 | 
				
			||||||
      assert(0);
 | 
					      assert(0);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -419,8 +419,10 @@ void ExchangeTester(const functor &func)
 | 
				
			|||||||
    assert(found==1);
 | 
					    assert(found==1);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //  for(int i=0;i<Nsimd;i++){
 | 
					  for(int i=0;i<Nsimd;i++){
 | 
				
			||||||
    //    std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl;
 | 
					    assert(test1[i]==input1[i]);
 | 
				
			||||||
 | 
					    assert(test2[i]==input2[i]);
 | 
				
			||||||
 | 
					  }//    std::cout << " i "<< i<<" test1"<<test1[i]<<" "<<input1[i]<<std::endl;
 | 
				
			||||||
    //    std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl;
 | 
					    //    std::cout << " i "<< i<<" test2"<<test2[i]<<" "<<input2[i]<<std::endl;
 | 
				
			||||||
  //  }
 | 
					  //  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user