diff --git a/.travis.yml b/.travis.yml index 107343ed..20716334 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ cache: matrix: include: - os: osx - osx_image: xcode7.2 + osx_image: xcode8.3 compiler: clang - compiler: gcc addons: diff --git a/configure.ac b/configure.ac index e0820d79..6b233c0d 100644 --- a/configure.ac +++ b/configure.ac @@ -86,7 +86,7 @@ esac ############### FP16 conversions AC_ARG_ENABLE([fp16], [AC_HELP_STRING([--enable-fp16=yes|no], [enable fp16 comms])], - [ac_FP16=${enable_fp16}], [ac_FP16=no]) + [ac_FP16=${enable_fp16}], [ac_FP16=yes]) case ${ac_FP16} in no) ;; @@ -189,8 +189,7 @@ case ${ax_cv_cxx_compiler_vendor} in case ${ac_SIMD} in SSE4) AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) -# SIMD_FLAGS='-msse4.2 -mf16c';; - SIMD_FLAGS='-msse4.2';; + SIMD_FLAGS='-msse4.2 -mf16c';; AVX) AC_DEFINE([AVX1],[1],[AVX intrinsics]) SIMD_FLAGS='-mavx -mf16c';; diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 969ba3ed..8a4537c2 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -330,10 +330,8 @@ namespace Optimization { }; -#ifndef _mm_alignr_epi64 -#define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16) -#define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16) -#endif +#define _my_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16) +#define _my_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16) struct PrecisionChange { static inline __m128i StoH (__m128 a,__m128 b) { @@ -350,7 +348,7 @@ namespace Optimization { static inline void HtoS (__m128i h,__m128 &sa,__m128 &sb) { #ifdef USE_FP16 sa = _mm_cvtph_ps(h); - h = (__m128i)_mm_alignr_epi32((__m128i)h,(__m128i)h,2); + h = (__m128i)_my_alignr_epi32((__m128i)h,(__m128i)h,2); sb = _mm_cvtph_ps(h); #else assert(0); @@ -364,7 +362,7 @@ namespace Optimization { } static inline void StoD (__m128 s,__m128d &a,__m128d &b) { a = _mm_cvtps_pd(s); - s = (__m128)_mm_alignr_epi32((__m128i)s,(__m128i)s,2); + s = (__m128)_my_alignr_epi32((__m128i)s,(__m128i)s,2); b = _mm_cvtps_pd(s); } static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) { @@ -439,8 +437,8 @@ namespace Optimization { } } - template static inline __m128 tRotate(__m128 in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); }; - template static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); }; + template static inline __m128 tRotate(__m128 in){ return (__m128)_my_alignr_epi32((__m128i)in,(__m128i)in,n); }; + template static inline __m128d tRotate(__m128d in){ return (__m128d)_my_alignr_epi64((__m128i)in,(__m128i)in,n); }; }; //////////////////////////////////////////////