1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 17:25:37 +01:00

Patches for knl

This commit is contained in:
Peter Boyle 2017-04-13 12:02:12 -04:00
parent 42fb49d3fd
commit b9113ed310

View File

@ -343,11 +343,12 @@ namespace Optimization {
struct PrecisionChange { struct PrecisionChange {
static inline __m512i StoH (__m512 a,__m512 b) { static inline __m512i StoH (__m512 a,__m512 b) {
__m512i h;
#ifdef USE_FP16 #ifdef USE_FP16
__m256i ha = _mm512_cvtps_ph(a,0); __m256i ha = _mm512_cvtps_ph(a,0);
__m256i hb = _mm512_cvtps_ph(b,0); __m256i hb = _mm512_cvtps_ph(b,0);
__m512 h = _mm512_castps256_ps512(ha); h =(__m512i) _mm512_castps256_ps512(ha);
h = _mm512_insertf256_ps(h,hb,1); h =(__m512i) _mm512_insertf64x4((__m512d)h,(__m512d)hb,1);
#else #else
assert(0); assert(0);
#endif #endif
@ -365,12 +366,12 @@ namespace Optimization {
__m256 sa = _mm512_cvtpd_ps(a); __m256 sa = _mm512_cvtpd_ps(a);
__m256 sb = _mm512_cvtpd_ps(b); __m256 sb = _mm512_cvtpd_ps(b);
__m512 s = _mm512_castps256_ps512(sa); __m512 s = _mm512_castps256_ps512(sa);
s = _mm512_insertf256_ps(s,sb,1); s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1);
return s; return s;
} }
static inline void StoD (__m512 s,__m512d &a,__m512d &b) { static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
a = _mm512_cvtps_pd(_mm512_extractf256_ps(s,0)); a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0));
b = _mm512_cvtps_pd(_mm512_extractf256_ps(s,1)); b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1));
} }
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) { static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
__m512 sa,sb; __m512 sa,sb;
@ -581,7 +582,9 @@ namespace Optimization {
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
typedef __m512 SIMD_Ftype; // Single precision type
typedef __m512i SIMD_Htype; // Single precision type
typedef __m512 SIMD_Ftype; // Single precision type
typedef __m512d SIMD_Dtype; // Double precision type typedef __m512d SIMD_Dtype; // Double precision type
typedef __m512i SIMD_Itype; // Integer type typedef __m512i SIMD_Itype; // Integer type