diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 72a2ec9a..eb76427e 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -333,73 +333,6 @@ struct Permute{ #ifdef SFW_FP16 -struct Grid_half { - Grid_half(){} - Grid_half(uint16_t raw) : x(raw) {} - uint16_t x; -}; -union FP32 { - unsigned int u; - float f; -}; - -// PAB - Lifted and adapted from Eigen, which is GPL V2 -inline float sfw_half_to_float(Grid_half h) { - const FP32 magic = { 113 << 23 }; - const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift - FP32 o; - o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits - unsigned int exp = shifted_exp & o.u; // just the exponent - o.u += (127 - 15) << 23; // exponent adjust - // handle exponent special cases - if (exp == shifted_exp) { // Inf/NaN? - o.u += (128 - 16) << 23; // extra exp adjust - } else if (exp == 0) { // Zero/Denormal? - o.u += 1 << 23; // extra exp adjust - o.f -= magic.f; // renormalize - } - o.u |= (h.x & 0x8000) << 16; // sign bit - return o.f; -} -inline Grid_half sfw_float_to_half(float ff) { - FP32 f; f.f = ff; - const FP32 f32infty = { 255 << 23 }; - const FP32 f16max = { (127 + 16) << 23 }; - const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; - unsigned int sign_mask = 0x80000000u; - Grid_half o; - - o.x = static_cast(0x0u); - unsigned int sign = f.u & sign_mask; - f.u ^= sign; - // NOTE all the integer compares in this function can be safely - // compiled into signed compares since all operands are below - // 0x80000000. Important if you want fast straight SSE2 code - // (since there's no unsigned PCMPGTD). - if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set) - o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf - } else { // (De)normalized number or zero - if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero - // use a magic value to align our 10 mantissa bits at the bottom of - // the float. as long as FP addition is round-to-nearest-even this - // just works. - f.f += denorm_magic.f; - // and one integer subtract of the bias later, we have our final float! - o.x = static_cast(f.u - denorm_magic.u); - } else { - unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd - - // update exponent, rounding bias part 1 - f.u += ((unsigned int)(15 - 127) << 23) + 0xfff; - // rounding bias part 2 - f.u += mant_odd; - // take the bits! - o.x = static_cast(f.u >> 13); - } - } - o.x |= static_cast(sign >> 16); - return o; -} static inline __m128i Grid_mm_cvtps_ph(__m128 f,int discard) { __m128i ret=(__m128i)_mm_setzero_ps(); float *fp = (float *)&f;