From 8b6541fb605ffd0b7a742376c1d4689da9530e23 Mon Sep 17 00:00:00 2001 From: gfilaci Date: Fri, 29 Mar 2019 16:43:31 +0000 Subject: [PATCH] Fix gpu MultRealPart and MaddRealPart bug --- Grid/simd/Grid_gpu.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Grid/simd/Grid_gpu.h b/Grid/simd/Grid_gpu.h index 2f7d47ec..6dc5123a 100644 --- a/Grid/simd/Grid_gpu.h +++ b/Grid/simd/Grid_gpu.h @@ -245,18 +245,18 @@ namespace Optimization { struct MultRealPart{ accelerator_inline float4 operator()(float4 a, float4 b){ float4 ymm0; - ymm0.x = a.y; - ymm0.y = a.y; - ymm0.z = a.w; - ymm0.w = a.w; + ymm0.x = a.x; + ymm0.y = a.x; + ymm0.z = a.z; + ymm0.w = a.z; return ymm0*b; // ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, // return _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br } accelerator_inline double2 operator()(double2 a, double2 b){ double2 ymm0; - ymm0.x = a.y; - ymm0.y = a.y; + ymm0.x = a.x; + ymm0.y = a.x; return ymm0*b; // ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00 // return _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br @@ -265,17 +265,17 @@ namespace Optimization { struct MaddRealPart{ accelerator_inline float4 operator()(float4 a, float4 b, float4 c){ float4 ymm0; // = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, - ymm0.x = a.y; - ymm0.y = a.y; - ymm0.z = a.w; - ymm0.w = a.w; + ymm0.x = a.x; + ymm0.y = a.x; + ymm0.z = a.z; + ymm0.w = a.z; return c+ymm0*b; } accelerator_inline double2 operator()(double2 a, double2 b, double2 c){ // ymm0 = _mm_shuffle_pd( a, a, 0x0 ); double2 ymm0; - ymm0.x = a.y; - ymm0.y = a.y; + ymm0.x = a.x; + ymm0.y = a.x; return c+ymm0*b; } };