mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Update to use Xcode 8.3 since -mfp16 causes SIGILL
This commit is contained in:
parent
2846f079e5
commit
db5ea001a3
@ -7,7 +7,7 @@ cache:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- os: osx
|
- os: osx
|
||||||
osx_image: xcode7.2
|
osx_image: xcode8.3
|
||||||
compiler: clang
|
compiler: clang
|
||||||
- compiler: gcc
|
- compiler: gcc
|
||||||
addons:
|
addons:
|
||||||
|
@ -86,7 +86,7 @@ esac
|
|||||||
############### FP16 conversions
|
############### FP16 conversions
|
||||||
AC_ARG_ENABLE([fp16],
|
AC_ARG_ENABLE([fp16],
|
||||||
[AC_HELP_STRING([--enable-fp16=yes|no], [enable fp16 comms])],
|
[AC_HELP_STRING([--enable-fp16=yes|no], [enable fp16 comms])],
|
||||||
[ac_FP16=${enable_fp16}], [ac_FP16=no])
|
[ac_FP16=${enable_fp16}], [ac_FP16=yes])
|
||||||
case ${ac_FP16} in
|
case ${ac_FP16} in
|
||||||
no)
|
no)
|
||||||
;;
|
;;
|
||||||
@ -189,8 +189,7 @@ case ${ax_cv_cxx_compiler_vendor} in
|
|||||||
case ${ac_SIMD} in
|
case ${ac_SIMD} in
|
||||||
SSE4)
|
SSE4)
|
||||||
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
# SIMD_FLAGS='-msse4.2 -mf16c';;
|
SIMD_FLAGS='-msse4.2 -mf16c';;
|
||||||
SIMD_FLAGS='-msse4.2';;
|
|
||||||
AVX)
|
AVX)
|
||||||
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
SIMD_FLAGS='-mavx -mf16c';;
|
SIMD_FLAGS='-mavx -mf16c';;
|
||||||
|
@ -330,10 +330,8 @@ namespace Optimization {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#ifndef _mm_alignr_epi64
|
#define _my_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16)
|
||||||
#define _mm_alignr_epi32(a,b,n) _mm_alignr_epi8(a,b,(n*4)%16)
|
#define _my_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16)
|
||||||
#define _mm_alignr_epi64(a,b,n) _mm_alignr_epi8(a,b,(n*8)%16)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct PrecisionChange {
|
struct PrecisionChange {
|
||||||
static inline __m128i StoH (__m128 a,__m128 b) {
|
static inline __m128i StoH (__m128 a,__m128 b) {
|
||||||
@ -350,7 +348,7 @@ namespace Optimization {
|
|||||||
static inline void HtoS (__m128i h,__m128 &sa,__m128 &sb) {
|
static inline void HtoS (__m128i h,__m128 &sa,__m128 &sb) {
|
||||||
#ifdef USE_FP16
|
#ifdef USE_FP16
|
||||||
sa = _mm_cvtph_ps(h);
|
sa = _mm_cvtph_ps(h);
|
||||||
h = (__m128i)_mm_alignr_epi32((__m128i)h,(__m128i)h,2);
|
h = (__m128i)_my_alignr_epi32((__m128i)h,(__m128i)h,2);
|
||||||
sb = _mm_cvtph_ps(h);
|
sb = _mm_cvtph_ps(h);
|
||||||
#else
|
#else
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -364,7 +362,7 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
static inline void StoD (__m128 s,__m128d &a,__m128d &b) {
|
static inline void StoD (__m128 s,__m128d &a,__m128d &b) {
|
||||||
a = _mm_cvtps_pd(s);
|
a = _mm_cvtps_pd(s);
|
||||||
s = (__m128)_mm_alignr_epi32((__m128i)s,(__m128i)s,2);
|
s = (__m128)_my_alignr_epi32((__m128i)s,(__m128i)s,2);
|
||||||
b = _mm_cvtps_pd(s);
|
b = _mm_cvtps_pd(s);
|
||||||
}
|
}
|
||||||
static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) {
|
static inline __m128i DtoH (__m128d a,__m128d b,__m128d c,__m128d d) {
|
||||||
@ -439,8 +437,8 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int n> static inline __m128 tRotate(__m128 in){ return (__m128)_mm_alignr_epi32((__m128i)in,(__m128i)in,n); };
|
template<int n> static inline __m128 tRotate(__m128 in){ return (__m128)_my_alignr_epi32((__m128i)in,(__m128i)in,n); };
|
||||||
template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_mm_alignr_epi64((__m128i)in,(__m128i)in,n); };
|
template<int n> static inline __m128d tRotate(__m128d in){ return (__m128d)_my_alignr_epi64((__m128i)in,(__m128i)in,n); };
|
||||||
|
|
||||||
};
|
};
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
Loading…
Reference in New Issue
Block a user