1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-31 19:57:07 +01:00

Half precision conversion

This commit is contained in:
paboyle
2017-04-12 19:32:37 +01:00
parent b12dc89d26
commit cb6b81ae82
6 changed files with 261 additions and 20 deletions

View File

@@ -471,6 +471,42 @@ namespace Optimization {
};
};
struct PrecisionChange {
static inline __m256i StoH (__m256 a,__m256 b) {
__m128i ha = _mm256_cvtps_ph(a,0);
__m128i hb = _mm256_cvtps_ph(b,0);
__m256 h = _mm256_castps128_ps256(ha);
h = _mm256_insertf128_ps(h,hb,1);
return h;
}
static inline void HtoS (__m256i h,__m256 &sa,__m256 &sb) {
sa = _mm256_cvtph_ps(_mm256_extractf128_ps(h,0));
sb = _mm256_cvtph_ps(_mm256_extractf128_ps(h,1));
}
static inline __m256 DtoS (__m256d a,__m256d b) {
__m128 sa = _mm256_cvtpd_ps(a);
__m128 sb = _mm256_cvtpd_ps(b);
__m256 s = _mm256_castps128_ps256(sa);
s = _mm256_insertf128_ps(s,sb,1);
return s;
}
static inline void StoD (__m256 s,__m256d &a,__m256d &b) {
a = _mm256_cvtps_pd(_mm256_extractf128_ps(s,0));
b = _mm256_cvtps_pd(_mm256_extractf128_ps(s,1));
}
static inline __m256 DtoH (__m256i a,__m256 b,__m256 c,__m256 d) {
__m256 sa,sb;
sa = DtoS(a,b);
sb = DtoS(c,d);
return StoH(sa,sb);
}
static inline void HtoD (__m256i h,__m256d &a,__m256d &b,__m256d &c,__m256d &d) {
__m256 sa,sb;
HtoS(h,sa,sb);
StoD(sa,a,b);
StoD(sb,c,d);
}
};
struct Exchange{
// 3210 ordering
static inline void Exchange0(__m256 &out1,__m256 &out2,__m256 in1,__m256 in2){
@@ -675,6 +711,7 @@ namespace Optimization {
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types
typedef __m256i SIMD_Htype; // Single precision type
typedef __m256 SIMD_Ftype; // Single precision type
typedef __m256d SIMD_Dtype; // Double precision type
typedef __m256i SIMD_Itype; // Integer type