1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Patches for knl

This commit is contained in:
Peter Boyle 2017-04-13 12:02:12 -04:00
parent 42fb49d3fd
commit b9113ed310

View File

@ -343,11 +343,12 @@ namespace Optimization {
struct PrecisionChange {
static inline __m512i StoH (__m512 a,__m512 b) {
__m512i h;
#ifdef USE_FP16
__m256i ha = _mm512_cvtps_ph(a,0);
__m256i hb = _mm512_cvtps_ph(b,0);
__m512 h = _mm512_castps256_ps512(ha);
h = _mm512_insertf256_ps(h,hb,1);
h =(__m512i) _mm512_castps256_ps512(ha);
h =(__m512i) _mm512_insertf64x4((__m512d)h,(__m512d)hb,1);
#else
assert(0);
#endif
@ -365,12 +366,12 @@ namespace Optimization {
__m256 sa = _mm512_cvtpd_ps(a);
__m256 sb = _mm512_cvtpd_ps(b);
__m512 s = _mm512_castps256_ps512(sa);
s = _mm512_insertf256_ps(s,sb,1);
s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1);
return s;
}
static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
a = _mm512_cvtps_pd(_mm512_extractf256_ps(s,0));
b = _mm512_cvtps_pd(_mm512_extractf256_ps(s,1));
a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0));
b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1));
}
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
__m512 sa,sb;
@ -581,6 +582,8 @@ namespace Optimization {
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
typedef __m512i SIMD_Htype; // Single precision type
typedef __m512 SIMD_Ftype; // Single precision type
typedef __m512d SIMD_Dtype; // Double precision type
typedef __m512i SIMD_Itype; // Integer type