mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Patches for knl
This commit is contained in:
parent
42fb49d3fd
commit
b9113ed310
@ -343,11 +343,12 @@ namespace Optimization {
|
|||||||
|
|
||||||
struct PrecisionChange {
|
struct PrecisionChange {
|
||||||
static inline __m512i StoH (__m512 a,__m512 b) {
|
static inline __m512i StoH (__m512 a,__m512 b) {
|
||||||
|
__m512i h;
|
||||||
#ifdef USE_FP16
|
#ifdef USE_FP16
|
||||||
__m256i ha = _mm512_cvtps_ph(a,0);
|
__m256i ha = _mm512_cvtps_ph(a,0);
|
||||||
__m256i hb = _mm512_cvtps_ph(b,0);
|
__m256i hb = _mm512_cvtps_ph(b,0);
|
||||||
__m512 h = _mm512_castps256_ps512(ha);
|
h =(__m512i) _mm512_castps256_ps512(ha);
|
||||||
h = _mm512_insertf256_ps(h,hb,1);
|
h =(__m512i) _mm512_insertf64x4((__m512d)h,(__m512d)hb,1);
|
||||||
#else
|
#else
|
||||||
assert(0);
|
assert(0);
|
||||||
#endif
|
#endif
|
||||||
@ -365,12 +366,12 @@ namespace Optimization {
|
|||||||
__m256 sa = _mm512_cvtpd_ps(a);
|
__m256 sa = _mm512_cvtpd_ps(a);
|
||||||
__m256 sb = _mm512_cvtpd_ps(b);
|
__m256 sb = _mm512_cvtpd_ps(b);
|
||||||
__m512 s = _mm512_castps256_ps512(sa);
|
__m512 s = _mm512_castps256_ps512(sa);
|
||||||
s = _mm512_insertf256_ps(s,sb,1);
|
s =(__m512) _mm512_insertf64x4((__m512d)s,(__m256d)sb,1);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
|
static inline void StoD (__m512 s,__m512d &a,__m512d &b) {
|
||||||
a = _mm512_cvtps_pd(_mm512_extractf256_ps(s,0));
|
a = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,0));
|
||||||
b = _mm512_cvtps_pd(_mm512_extractf256_ps(s,1));
|
b = _mm512_cvtps_pd((__m256)_mm512_extractf64x4_pd((__m512d)s,1));
|
||||||
}
|
}
|
||||||
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
|
static inline __m512i DtoH (__m512d a,__m512d b,__m512d c,__m512d d) {
|
||||||
__m512 sa,sb;
|
__m512 sa,sb;
|
||||||
@ -581,7 +582,9 @@ namespace Optimization {
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
|
|
||||||
typedef __m512 SIMD_Ftype; // Single precision type
|
|
||||||
|
typedef __m512i SIMD_Htype; // Single precision type
|
||||||
|
typedef __m512 SIMD_Ftype; // Single precision type
|
||||||
typedef __m512d SIMD_Dtype; // Double precision type
|
typedef __m512d SIMD_Dtype; // Double precision type
|
||||||
typedef __m512i SIMD_Itype; // Integer type
|
typedef __m512i SIMD_Itype; // Integer type
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user