1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Update to use Xcode 8.3 since -mfp16 causes SIGILL

This commit is contained in:
paboyle 2017-04-13 12:22:40 +01:00
parent 2846f079e5
commit db5ea001a3
3 changed files with 9 additions and 12 deletions

View File

@ -7,7 +7,7 @@ cache:
matrix: matrix:
include: include:
- os: osx - os: osx
osx_image: xcode7.2 osx_image: xcode8.3
compiler: clang compiler: clang
- compiler: gcc - compiler: gcc
addons: addons:

View File

@ -86,7 +86,7 @@ esac
############### FP16 conversions ############### FP16 conversions
AC_ARG_ENABLE([fp16], AC_ARG_ENABLE([fp16],
[AC_HELP_STRING([--enable-fp16=yes|no], [enable fp16 comms])], [AC_HELP_STRING([--enable-fp16=yes|no], [enable fp16 comms])],
[ac_FP16=${enable_fp16}], [ac_FP16=no]) [ac_FP16=${enable_fp16}], [ac_FP16=yes])
case ${ac_FP16} in case ${ac_FP16} in
no) no)
;; ;;
@ -189,8 +189,7 @@ case ${ax_cv_cxx_compiler_vendor} in
case ${ac_SIMD} in case ${ac_SIMD} in
SSE4) SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
# SIMD_FLAGS='-msse4.2 -mf16c';; SIMD_FLAGS='-msse4.2 -mf16c';;
SIMD_FLAGS='-msse4.2';;
AVX) AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics]) AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx -mf16c';; SIMD_FLAGS='-mavx -mf16c';;

View File

@ -330,10 +330,8 @@ namespace Optimization {
}; };
#ifndef _mm_alignr_epi64 #define _my_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16)
#define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16) #define _my_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16)
#define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16)
#endif
struct PrecisionChange { struct PrecisionChange {
static inline __m128i StoH (__m128 a,__m128 b) { static inline __m128i StoH (__m128 a,__m128 b) {
@ -350,7 +348,7 @@ namespace Optimization {
static inline void HtoS (__m128i h,__m128 &sa,__m128 &sb) { static inline void HtoS (__m128i h,__m128 &sa,__m128 &sb) {
#ifdef USE_FP16 #ifdef USE_FP16
sa = _mm_cvtph_ps(h); sa = _mm_cvtph_ps(h);
h = (__m128i)_mm_alignr_epi32((__m128i)h,(__m128i)h,2); h = (__m128i)_my_alignr_epi32((__m128i)h,(__m128i)h,2);
sb = _mm_cvtph_ps(h); sb = _mm_cvtph_ps(h);
#else #else
assert(0); assert(0);
@ -364,7 +362,7 @@ namespace Optimization {
} }
static inline void StoD (__m128 s,__m128d &a,__m128d &b) { static inline void StoD (__m128 s,__m128d &a,__m128d &b) {
a = _mm_cvtps_pd(s); a = _mm_cvtps_pd(s);
s = (__m128)_mm_alignr_epi32((__m128i)s,(__m128i)s,2); s = (__m128)_my_alignr_epi32((__m128i)s,(__m128i)s,2);
b = _mm_cvtps_pd(s); b = _mm_cvtps_pd(s);
} }
static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) { static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) {
@ -439,8 +437,8 @@ namespace Optimization {
} }
} }
template<int n> static inline __m128 tRotate(__m128 in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); }; template<int n> static inline __m128 tRotate(__m128 in){ return (__m128)_my_alignr_epi32((__m128i)in,(__m128i)in,n); };
template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); }; template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_my_alignr_epi64((__m128i)in,(__m128i)in,n); };
}; };
////////////////////////////////////////////// //////////////////////////////////////////////