From 249165d1b29c3fc38f6f85655a3051dba7ded578 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 5 May 2015 18:09:28 +0100 Subject: [PATCH] Added streaming stores --- lib/simd/Grid_vComplexD.h | 15 +++++++++++++++ lib/simd/Grid_vComplexF.h | 29 ++++++++++++++++++++++------- lib/simd/Grid_vInteger.h | 4 ++++ lib/simd/Grid_vRealD.h | 15 +++++++++++++++ lib/simd/Grid_vRealF.h | 15 +++++++++++++++ 5 files changed, 71 insertions(+), 7 deletions(-) diff --git a/lib/simd/Grid_vComplexD.h b/lib/simd/Grid_vComplexD.h index 208e2640..f0108d59 100644 --- a/lib/simd/Grid_vComplexD.h +++ b/lib/simd/Grid_vComplexD.h @@ -242,6 +242,21 @@ friend inline void vstore(const vComplexD &ret, ComplexD *a){ assert(0); #endif } + friend inline void vstream(vComplexD &out,const vComplexD &in){ +#if defined (AVX1)|| defined (AVX2) + _mm256_stream_pd((double *)&out.v,in.v); +#endif +#ifdef SSE4 + _mm_stream_pd((double *)&out.v,in.v); +#endif +#ifdef AVX512 + _mm512_stream_pd((double *)&out.v,in.v); + //Note v has a3 a2 a1 a0 +#endif +#ifdef QPX + assert(0); +#endif + } friend inline void vprefetch(const vComplexD &v) { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); diff --git a/lib/simd/Grid_vComplexF.h b/lib/simd/Grid_vComplexF.h index 9c7922e3..5f52fc53 100644 --- a/lib/simd/Grid_vComplexF.h +++ b/lib/simd/Grid_vComplexF.h @@ -176,21 +176,36 @@ namespace Grid { vsplat(ret,a,b); } -friend inline void vstore(const vComplexF &ret, ComplexF *a){ + friend inline void vstore(const vComplexF &ret, ComplexF *a){ #if defined (AVX1)|| defined (AVX2) - _mm256_store_ps((float *)a,ret.v); + _mm256_store_ps((float *)a,ret.v); #endif #ifdef SSE4 - _mm_store_ps((float *)a,ret.v); + _mm_store_ps((float *)a,ret.v); #endif #ifdef AVX512 - _mm512_store_ps((float *)a,ret.v); -//Note v has a3 a2 a1 a0 + _mm512_store_ps((float *)a,ret.v); + //Note v has a3 a2 a1 a0 #endif #ifdef QPX - assert(0); + assert(0); #endif -} + } + friend inline void vstream(vComplexF &out,const vComplexF &in){ +#if defined (AVX1)|| defined (AVX2) + _mm256_stream_ps((float *)&out.v,in.v); +#endif +#ifdef SSE4 + _mm_stream_ps((float *)&out.v,in.v); +#endif +#ifdef AVX512 + _mm512_stream_ps((float *)&out.v,in.v); + //Note v has a3 a2 a1 a0 +#endif +#ifdef QPX + assert(0); +#endif + } friend inline void vprefetch(const vComplexF &v) { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); diff --git a/lib/simd/Grid_vInteger.h b/lib/simd/Grid_vInteger.h index 6035aea1..5a429a9c 100644 --- a/lib/simd/Grid_vInteger.h +++ b/lib/simd/Grid_vInteger.h @@ -186,6 +186,10 @@ namespace Grid { #endif } + friend inline void vstream(vInteger & out,const vInteger &in){ + out=in; + } + friend inline void vprefetch(const vInteger &v) { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); diff --git a/lib/simd/Grid_vRealD.h b/lib/simd/Grid_vRealD.h index 0bdaa5e4..ee51fa03 100644 --- a/lib/simd/Grid_vRealD.h +++ b/lib/simd/Grid_vRealD.h @@ -174,6 +174,21 @@ namespace Grid { #endif #ifdef QPX assert(0); +#endif + } + friend inline void vstream(vRealD &out,const vRealD &in){ +#if defined (AVX1)|| defined (AVX2) + _mm256_stream_pd((double *)&out.v,in.v); +#endif +#ifdef SSE4 + _mm_stream_pd((double *)&out.v,in.v); +#endif +#ifdef AVX512 + _mm512_stream_pd((double *)&out.v,in.v); + //Note v has a3 a2 a1 a0 +#endif +#ifdef QPX + assert(0); #endif } friend inline void vprefetch(const vRealD &v) diff --git a/lib/simd/Grid_vRealF.h b/lib/simd/Grid_vRealF.h index e8d89cec..48e5c134 100644 --- a/lib/simd/Grid_vRealF.h +++ b/lib/simd/Grid_vRealF.h @@ -208,6 +208,21 @@ friend inline void vstore(const vRealF &ret, float *a){ assert(0); #endif } + friend inline void vstream(vRealF &out,const vRealF &in){ +#if defined (AVX1)|| defined (AVX2) + _mm256_stream_ps((float *)&out.v,in.v); +#endif +#ifdef SSE4 + _mm_stream_ps((float *)&out.v,in.v); +#endif +#ifdef AVX512 + _mm512_stream_ps((float *)&out.v,in.v); + //Note v has a3 a2 a1 a0 +#endif +#ifdef QPX + assert(0); +#endif + } friend inline void vprefetch(const vRealF &v)