From 661fc4d3d1f2555bf58f5c5dd2ef876435a0cc37 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 20 Feb 2017 17:48:36 -0500 Subject: [PATCH] Debug AVX512 exchange code paths --- lib/simd/Grid_avx512.h | 6 ++++++ lib/simd/Grid_vector_types.h | 17 +++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 22d45aeb..f39c4033 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -355,6 +355,8 @@ namespace Optimization { static inline void Exchange1(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ out1= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); out2= _mm512_shuffle_f32x4(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + out1= _mm512_shuffle_f32x4(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm512_shuffle_f32x4(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ }; static inline void Exchange2(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ out1= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(1,0,1,0)); @@ -363,6 +365,8 @@ namespace Optimization { static inline void Exchange3(__m512 &out1,__m512 &out2,__m512 in1,__m512 in2){ out1= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); out2= _mm512_shuffle_ps(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + out1= _mm512_shuffle_ps(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm512_shuffle_ps(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ }; static inline void Exchange0(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ @@ -372,6 +376,8 @@ namespace Optimization { static inline void Exchange1(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ out1= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(2,0,2,0)); out2= _mm512_shuffle_f64x2(in1,in2,_MM_SELECT_FOUR_FOUR(3,1,3,1)); + out1= _mm512_shuffle_f64x2(out1,out1,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ + out2= _mm512_shuffle_f64x2(out2,out2,_MM_SELECT_FOUR_FOUR(3,1,2,0)); /*AECG*/ }; static inline void Exchange2(__m512d &out1,__m512d &out2,__m512d in1,__m512d in2){ out1 = _mm512_shuffle_pd(in1,in2,0x00); diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index cd499d88..dcdacbe0 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -356,10 +356,19 @@ class Grid_simd { /////////////////////// friend inline void exchange(Grid_simd &out1,Grid_simd &out2,Grid_simd in1,Grid_simd in2,int n) { - if (n==3) Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v); - else if(n==2) Optimization::Exchange::Exchange2(out1.v,out2.v,in1.v,in2.v); - else if(n==1) Optimization::Exchange::Exchange1(out1.v,out2.v,in1.v,in2.v); - else if(n==0) Optimization::Exchange::Exchange0(out1.v,out2.v,in1.v,in2.v); + if (n==3) { + Optimization::Exchange::Exchange3(out1.v,out2.v,in1.v,in2.v); + // std::cout << " Exchange3 "<< out1<<" "<< out2<<" <- " << in1 << " "<