mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Check at configure time if CPU supports the requested SIMD optimization
This commit is contained in:
parent
64753ea633
commit
19bd6f103a
31
.gitignore
vendored
31
.gitignore
vendored
@ -5,6 +5,7 @@
|
||||
*.obj
|
||||
*~
|
||||
errs
|
||||
*#
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
@ -48,3 +49,33 @@ config.status
|
||||
/stamp-h1
|
||||
/config.sub
|
||||
/config.guess
|
||||
|
||||
|
||||
# Packages #
|
||||
############
|
||||
# it's better to unpack these files and commit the raw source
|
||||
# git has its own built in compression methods
|
||||
*.7z
|
||||
*.dmg
|
||||
*.gz
|
||||
*.iso
|
||||
*.jar
|
||||
*.rar
|
||||
*.tar
|
||||
*.zip
|
||||
|
||||
# Logs and databases #
|
||||
######################
|
||||
*.log
|
||||
*.sql
|
||||
*.sqlite
|
||||
|
||||
# OS generated files #
|
||||
######################
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
@ -20,7 +20,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
||||
for most programmers.
|
||||
|
||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
||||
|
||||
These are presented as
|
||||
|
||||
@ -46,3 +46,5 @@ are examples:
|
||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
||||
|
||||
|
||||
For developers:
|
||||
Use reconfigure_script in the scripts/ directory to create the autotools environment
|
||||
|
32
configure.ac
32
configure.ac
@ -3,7 +3,7 @@
|
||||
#
|
||||
# Project Grid package
|
||||
#
|
||||
# Time-stamp: <2015-05-26 17:18:54 neo>
|
||||
# Time-stamp: <2015-05-27 18:29:04 neo>
|
||||
|
||||
AC_PREREQ([2.63])
|
||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
||||
@ -27,7 +27,7 @@ AC_PROG_CXX
|
||||
AC_OPENMP
|
||||
AC_PROG_RANLIB
|
||||
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
||||
|
||||
AX_EXT
|
||||
|
||||
# Checks for libraries.
|
||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
||||
@ -69,26 +69,44 @@ Info at: http://www.mpfr.org/)])
|
||||
|
||||
|
||||
|
||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE|AVX|AVX2|AVX512|MIC],\
|
||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
||||
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
|
||||
|
||||
supported=no
|
||||
|
||||
case ${ac_SIMD} in
|
||||
SSE4)
|
||||
echo Configuring for SSE4
|
||||
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
|
||||
AC_DEFINE([SSE4],[1],[SSE4] )
|
||||
supported=yes
|
||||
else
|
||||
AC_MSG_WARN([Your processor does not support SSE4 instructions])
|
||||
fi
|
||||
;;
|
||||
AVX)
|
||||
echo Configuring for AVX
|
||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
||||
AC_DEFINE([AVX1],[1],[AVX] )
|
||||
supported=yes
|
||||
else
|
||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
||||
fi
|
||||
;;
|
||||
AVX2)
|
||||
echo Configuring for AVX2
|
||||
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
|
||||
AC_DEFINE([AVX2],[1],[AVX2] )
|
||||
supported=yes
|
||||
else
|
||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
||||
fi
|
||||
;;
|
||||
AVX512|MIC)
|
||||
echo Configuring for AVX512 and MIC
|
||||
AC_DEFINE([AVX512],[1],[AVX512] )
|
||||
supported=yes
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]);
|
||||
@ -129,7 +147,9 @@ then
|
||||
AC_CONFIG_FILES([docs/doxy.cfg])
|
||||
fi
|
||||
|
||||
|
||||
echo
|
||||
echo Creating configuration files
|
||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CONFIG_FILES(lib/Makefile)
|
||||
AC_CONFIG_FILES(tests/Makefile)
|
||||
@ -150,9 +170,9 @@ The following features are enabled:
|
||||
- os (target) : $target_os
|
||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||
|
||||
- Supported SIMD flags : $SIMD_FLAGS
|
||||
----------------------------------------------------------
|
||||
- enabled simd support : ${ac_SIMD}
|
||||
- enabled simd support : ${ac_SIMD} (supported: $supported )
|
||||
- communications type : ${ac_COMMS}
|
||||
|
||||
|
||||
|
@ -16,6 +16,15 @@
|
||||
/* GRID_COMMS_NONE */
|
||||
#define GRID_COMMS_NONE 1
|
||||
|
||||
/* Support Altivec instructions */
|
||||
/* #undef HAVE_ALTIVEC */
|
||||
|
||||
/* Support AVX (Advanced Vector Extensions) instructions */
|
||||
/* #undef HAVE_AVX */
|
||||
|
||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
||||
/* #undef HAVE_AVX2 */
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
/* #undef HAVE_CXX11 */
|
||||
|
||||
@ -30,6 +39,9 @@
|
||||
/* Define to 1 if you have the <endian.h> header file. */
|
||||
#define HAVE_ENDIAN_H 1
|
||||
|
||||
/* Support FMA3 (Fused Multiply-Add) instructions */
|
||||
/* #undef HAVE_FMA */
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#define HAVE_GETTIMEOFDAY 1
|
||||
|
||||
@ -54,9 +66,30 @@
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Support mmx instructions */
|
||||
#define HAVE_MMX /**/
|
||||
|
||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
||||
#define HAVE_MM_MALLOC_H 1
|
||||
|
||||
/* Support SSE (Streaming SIMD Extensions) instructions */
|
||||
#define HAVE_SSE /**/
|
||||
|
||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
|
||||
#define HAVE_SSE2 /**/
|
||||
|
||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
|
||||
#define HAVE_SSE3 /**/
|
||||
|
||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
|
||||
#define HAVE_SSE4_1 /**/
|
||||
|
||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
|
||||
#define HAVE_SSE4_2 /**/
|
||||
|
||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
|
||||
#define HAVE_SSSE3 /**/
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
|
@ -15,6 +15,15 @@
|
||||
/* GRID_COMMS_NONE */
|
||||
#undef GRID_COMMS_NONE
|
||||
|
||||
/* Support Altivec instructions */
|
||||
#undef HAVE_ALTIVEC
|
||||
|
||||
/* Support AVX (Advanced Vector Extensions) instructions */
|
||||
#undef HAVE_AVX
|
||||
|
||||
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
|
||||
#undef HAVE_AVX2
|
||||
|
||||
/* define if the compiler supports basic C++11 syntax */
|
||||
#undef HAVE_CXX11
|
||||
|
||||
@ -29,6 +38,9 @@
|
||||
/* Define to 1 if you have the <endian.h> header file. */
|
||||
#undef HAVE_ENDIAN_H
|
||||
|
||||
/* Support FMA3 (Fused Multiply-Add) instructions */
|
||||
#undef HAVE_FMA
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#undef HAVE_GETTIMEOFDAY
|
||||
|
||||
@ -53,9 +65,30 @@
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Support mmx instructions */
|
||||
#undef HAVE_MMX
|
||||
|
||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
|
||||
#undef HAVE_MM_MALLOC_H
|
||||
|
||||
/* Support SSE (Streaming SIMD Extensions) instructions */
|
||||
#undef HAVE_SSE
|
||||
|
||||
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
|
||||
#undef HAVE_SSE2
|
||||
|
||||
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
|
||||
#undef HAVE_SSE3
|
||||
|
||||
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
|
||||
#undef HAVE_SSE4_1
|
||||
|
||||
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
|
||||
#undef HAVE_SSE4_2
|
||||
|
||||
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
|
||||
#undef HAVE_SSSE3
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
|
72
m4/ax_check_compile_flag.m4
Normal file
72
m4/ax_check_compile_flag.m4
Normal file
@ -0,0 +1,72 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Check whether the given FLAG works with the current language's compiler
|
||||
# or gives an error. (Warnings, however, are ignored)
|
||||
#
|
||||
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
|
||||
# success/failure.
|
||||
#
|
||||
# If EXTRA-FLAGS is defined, it is added to the current language's default
|
||||
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
|
||||
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
|
||||
# force the compiler to issue an error when a bad flag is given.
|
||||
#
|
||||
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
|
||||
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
|
||||
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation, either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 2
|
||||
|
||||
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
|
||||
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
|
||||
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
|
||||
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
|
||||
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
|
||||
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
|
||||
[AS_VAR_SET(CACHEVAR,[yes])],
|
||||
[AS_VAR_SET(CACHEVAR,[no])])
|
||||
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
|
||||
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
|
||||
[m4_default([$2], :)],
|
||||
[m4_default([$3], :)])
|
||||
AS_VAR_POPDEF([CACHEVAR])dnl
|
||||
])dnl AX_CHECK_COMPILE_FLAGS
|
288
m4/ax_ext.m4
Normal file
288
m4/ax_ext.m4
Normal file
@ -0,0 +1,288 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_EXT
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Find supported SIMD extensions by requesting cpuid. When an SIMD
|
||||
# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
|
||||
# compiler supports it. For example, if "sse2" is available, then "-msse2"
|
||||
# is added to SIMD_FLAGS.
|
||||
#
|
||||
# This macro calls:
|
||||
#
|
||||
# AC_SUBST(SIMD_FLAGS)
|
||||
#
|
||||
# And defines:
|
||||
#
|
||||
# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
|
||||
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification, are
|
||||
# permitted in any medium without royalty provided the copyright notice
|
||||
# and this notice are preserved. This file is offered as-is, without any
|
||||
# warranty.
|
||||
|
||||
#serial 13
|
||||
|
||||
AC_DEFUN([AX_EXT],
|
||||
[
|
||||
AC_REQUIRE([AC_CANONICAL_HOST])
|
||||
|
||||
case $host_cpu in
|
||||
powerpc*)
|
||||
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
|
||||
[
|
||||
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
|
||||
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
|
||||
ax_cv_have_altivec_ext=yes
|
||||
fi
|
||||
fi
|
||||
])
|
||||
|
||||
if test "$ax_cv_have_altivec_ext" = yes; then
|
||||
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
|
||||
AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [])
|
||||
fi
|
||||
;;
|
||||
|
||||
|
||||
i[[3456]]86*|x86_64*|amd64*)
|
||||
|
||||
AC_REQUIRE([AX_GCC_X86_CPUID])
|
||||
AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
|
||||
|
||||
AX_GCC_X86_CPUID(0x00000001)
|
||||
ecx=0
|
||||
edx=0
|
||||
ebx=0
|
||||
if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown";
|
||||
then
|
||||
ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
|
||||
edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
|
||||
fi
|
||||
|
||||
AX_GCC_X86_CPUID(0x00000007)
|
||||
if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown";
|
||||
then
|
||||
ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2`
|
||||
fi
|
||||
|
||||
AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
|
||||
[
|
||||
ax_cv_have_mmx_ext=no
|
||||
if test "$((0x$edx>>23&0x01))" = 1; then
|
||||
ax_cv_have_mmx_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
|
||||
[
|
||||
ax_cv_have_sse_ext=no
|
||||
if test "$((0x$edx>>25&0x01))" = 1; then
|
||||
ax_cv_have_sse_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
|
||||
[
|
||||
ax_cv_have_sse2_ext=no
|
||||
if test "$((0x$edx>>26&0x01))" = 1; then
|
||||
ax_cv_have_sse2_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
|
||||
[
|
||||
ax_cv_have_sse3_ext=no
|
||||
if test "$((0x$ecx&0x01))" = 1; then
|
||||
ax_cv_have_sse3_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
|
||||
[
|
||||
ax_cv_have_ssse3_ext=no
|
||||
if test "$((0x$ecx>>9&0x01))" = 1; then
|
||||
ax_cv_have_ssse3_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
|
||||
[
|
||||
ax_cv_have_sse41_ext=no
|
||||
if test "$((0x$ecx>>19&0x01))" = 1; then
|
||||
ax_cv_have_sse41_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
|
||||
[
|
||||
ax_cv_have_sse42_ext=no
|
||||
if test "$((0x$ecx>>20&0x01))" = 1; then
|
||||
ax_cv_have_sse42_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
|
||||
[
|
||||
ax_cv_have_avx_cpu_ext=no
|
||||
if test "$((0x$ecx>>28&0x01))" = 1; then
|
||||
ax_cv_have_avx_cpu_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
AC_CACHE_CHECK([whether avx2 is supported by processor], [ax_cv_have_avx2_cpu_ext],
|
||||
[
|
||||
ax_cv_have_avx2_cpu_ext=no
|
||||
if test "$((0x$ebx>>5&0x01))" = 1; then
|
||||
ax_cv_have_avx2_cpu_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
|
||||
AC_CACHE_CHECK([whether fma is supported by processor], [ax_cv_have_fma_cpu_ext],
|
||||
[
|
||||
ax_cv_have_fma_cpu_ext=no
|
||||
if test "$((0x$ecx>>12&0x01))" = 1; then
|
||||
ax_cv_have_fma_cpu_ext=yes
|
||||
fi
|
||||
])
|
||||
|
||||
|
||||
if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
|
||||
AX_GCC_X86_AVX_XGETBV(0x00000000)
|
||||
|
||||
xgetbv_eax="0"
|
||||
if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
|
||||
xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
|
||||
fi
|
||||
|
||||
AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
|
||||
[
|
||||
ax_cv_have_avx_ext=no
|
||||
|
||||
if test "$((0x$ecx>>27&0x01))" = 1; then
|
||||
if test "$((0x$xgetbv_eax&0x6))" = 6; then
|
||||
ax_cv_have_avx_ext=yes
|
||||
fi
|
||||
fi
|
||||
])
|
||||
if test x"$ax_cv_have_avx_ext" = x"no"; then
|
||||
AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_mmx_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
|
||||
if test x"$ax_cv_support_mmx_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -mmmx"
|
||||
AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_sse_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
|
||||
if test x"$ax_cv_support_sse_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -msse"
|
||||
AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_sse2_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
|
||||
if test x"$ax_cv_support_sse2_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -msse2"
|
||||
AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_sse3_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
|
||||
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -msse3"
|
||||
AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_ssse3_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
|
||||
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -mssse3"
|
||||
AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_sse41_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
|
||||
if test x"$ax_cv_support_sse41_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -msse4.1"
|
||||
AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_sse42_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
|
||||
if test x"$ax_cv_support_sse42_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -msse4.2"
|
||||
AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_avx_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
|
||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -mavx"
|
||||
AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_avx2_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-mavx2, ax_cv_support_avx2_ext=yes, [])
|
||||
if test x"$ax_cv_support_avx2_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -mavx2"
|
||||
AC_DEFINE(HAVE_AVX2,,[Support AVX2 (Advanced Vector Extensions 2) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports avx2 instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$ax_cv_have_fma_ext" = yes; then
|
||||
AX_CHECK_COMPILE_FLAG(-mfma, ax_cv_support_fma_ext=yes, [])
|
||||
if test x"$ax_cv_support_fma_ext" = x"yes"; then
|
||||
SIMD_FLAGS="$SIMD_FLAGS -mfma"
|
||||
AC_DEFINE(HAVE_FMA,,[Support FMA3 (Fused Multiply-Add) instructions])
|
||||
else
|
||||
AC_MSG_WARN([Your processor supports fma instructions but not your compiler, can you try another compiler?])
|
||||
fi
|
||||
fi
|
||||
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_SUBST(SIMD_FLAGS)
|
||||
])
|
79
m4/ax_gcc_x86_avx_xgetbv.m4
Normal file
79
m4/ax_gcc_x86_avx_xgetbv.m4
Normal file
@ -0,0 +1,79 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_GCC_X86_AVX_XGETBV
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# On later x86 processors with AVX SIMD support, with gcc or a compiler
|
||||
# that has a compatible syntax for inline assembly instructions, run a
|
||||
# small program that executes the xgetbv instruction with input OP. This
|
||||
# can be used to detect if the OS supports AVX instruction usage.
|
||||
#
|
||||
# On output, the values of the eax and edx registers are stored as
|
||||
# hexadecimal strings as "eax:edx" in the cache variable
|
||||
# ax_cv_gcc_x86_avx_xgetbv.
|
||||
#
|
||||
# If the xgetbv instruction fails (because you are running a
|
||||
# cross-compiler, or because you are not using gcc, or because you are on
|
||||
# a processor that doesn't have this instruction),
|
||||
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
|
||||
#
|
||||
# This macro mainly exists to be used in AX_EXT.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation, either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 1
|
||||
|
||||
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
|
||||
[AC_REQUIRE([AC_PROG_CC])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
|
||||
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
|
||||
int op = $1, eax, edx;
|
||||
FILE *f;
|
||||
/* Opcodes for xgetbv */
|
||||
__asm__(".byte 0x0f, 0x01, 0xd0"
|
||||
: "=a" (eax), "=d" (edx),
|
||||
: "c" (op));
|
||||
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
|
||||
fprintf(f, "%x:%x\n", eax, edx);
|
||||
fclose(f);
|
||||
return 0;
|
||||
])],
|
||||
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
|
||||
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
|
||||
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
|
||||
AC_LANG_POP([C])
|
||||
])
|
45
m4/ax_gcc_x86_cpuid.m4
Normal file
45
m4/ax_gcc_x86_cpuid.m4
Normal file
@ -0,0 +1,45 @@
|
||||
dnl @synopsis AX_GCC_X86_CPUID(OP)
|
||||
dnl
|
||||
dnl @summary run x86 cpuid instruction OP using gcc inline assembler
|
||||
dnl
|
||||
dnl On Pentium and later x86 processors, with gcc or a compiler that
|
||||
dnl has a compatible syntax for inline assembly instructions, run a
|
||||
dnl small program that executes the cpuid instruction with input OP.
|
||||
dnl This can be used to detect the CPU type.
|
||||
dnl
|
||||
dnl On output, the values of the eax, ebx, ecx, and edx registers are
|
||||
dnl stored as hexadecimal strings as "eax:ebx:ecx:edx" in the cache
|
||||
dnl variable ax_cv_gcc_x86_cpuid_OP.
|
||||
dnl
|
||||
dnl If the cpuid instruction fails (because you are running a
|
||||
dnl cross-compiler, or because you are not using gcc, or because you
|
||||
dnl are on a processor that doesn't have this instruction),
|
||||
dnl ax_cv_gcc_x86_cpuid_OP is set to the string "unknown".
|
||||
dnl
|
||||
dnl This macro mainly exists to be used in AX_GCC_ARCHFLAG.
|
||||
dnl
|
||||
dnl @category Misc
|
||||
dnl @author Steven G. Johnson <stevenj@alum.mit.edu> and Matteo Frigo.
|
||||
dnl @version 2005-05-30
|
||||
dnl @license GPLWithACException
|
||||
|
||||
AC_DEFUN([AX_GCC_X86_CPUID],
|
||||
[AC_REQUIRE([AC_PROG_CC])
|
||||
AC_LANG_PUSH([C])
|
||||
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
|
||||
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
|
||||
int op = $1, eax, ebx, ecx, edx;
|
||||
FILE *f;
|
||||
__asm__("cpuid"
|
||||
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||
: "a" (op));
|
||||
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
|
||||
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
|
||||
fclose(f);
|
||||
return 0;
|
||||
])],
|
||||
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
|
||||
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
|
||||
[ax_cv_gcc_x86_cpuid_$1=unknown])])
|
||||
AC_LANG_POP([C])
|
||||
])
|
Loading…
Reference in New Issue
Block a user