mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Debug AVX512 exchange code paths
This commit is contained in:
		@@ -355,6 +355,8 @@ namespace Optimization {
 | 
			
		||||
    static inline void Exchange1(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){
 | 
			
		||||
      out1= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
 | 
			
		||||
      out2= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
 | 
			
		||||
      out1= _mm512_shuffle_f32x4(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
			
		||||
      out2= _mm512_shuffle_f32x4(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange2(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){
 | 
			
		||||
      out1= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0));
 | 
			
		||||
@@ -363,6 +365,8 @@ namespace Optimization {
 | 
			
		||||
    static inline void Exchange3(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){
 | 
			
		||||
      out1= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
 | 
			
		||||
      out2= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
 | 
			
		||||
      out1= _mm512_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
			
		||||
      out2= _mm512_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
			
		||||
    };
 | 
			
		||||
 
 | 
			
		||||
    static inline void Exchange0(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){
 | 
			
		||||
@@ -372,6 +376,8 @@ namespace Optimization {
 | 
			
		||||
    static inline void Exchange1(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){
 | 
			
		||||
      out1= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0));
 | 
			
		||||
      out2= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1));
 | 
			
		||||
      out1= _mm512_shuffle_f64x2(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
			
		||||
      out2= _mm512_shuffle_f64x2(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/
 | 
			
		||||
    };
 | 
			
		||||
    static inline void Exchange2(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){
 | 
			
		||||
      out1 = _mm512_shuffle_pd(in1,in2,0x00);
 | 
			
		||||
 
 | 
			
		||||
@@ -356,10 +356,19 @@ class Grid_simd {
 | 
			
		||||
  ///////////////////////
 | 
			
		||||
  friend inline void exchange(Grid_simd &out1,Grid_simd &out2,Grid_simd in1,Grid_simd in2,int n)
 | 
			
		||||
  {
 | 
			
		||||
    if     (n==3) Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
    else if(n==2) Optimization::Exchange::Exchange2(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
    else if(n==1) Optimization::Exchange::Exchange1(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
    else if(n==0) Optimization::Exchange::Exchange0(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
    if       (n==3) {
 | 
			
		||||
      Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
      //      std::cout << " Exchange3 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl;
 | 
			
		||||
    } else if(n==2) {
 | 
			
		||||
      Optimization::Exchange::Exchange2(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
      //      std::cout << " Exchange2 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl;
 | 
			
		||||
    } else if(n==1) {
 | 
			
		||||
      Optimization::Exchange::Exchange1(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
      //      std::cout << " Exchange1 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl;
 | 
			
		||||
    } else if(n==0) { 
 | 
			
		||||
      Optimization::Exchange::Exchange0(out1.v,out2.v,in1.v,in2.v);
 | 
			
		||||
      //      std::cout << " Exchange0 "<< out1<<" "<< out2<<" <- " << in1 << " "<<in2<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user