1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Check at configure time if CPU supports the requested SIMD optimization

This commit is contained in:
neo 2015-05-27 18:30:11 +09:00
parent 9fd6506d1f
commit d8b05e001d
11 changed files with 2475 additions and 14 deletions

31
.gitignore vendored
View File

@ -5,6 +5,7 @@
*.obj
*~
errs
*#
# Precompiled Headers
*.gch
@ -48,3 +49,33 @@ config.status
/stamp-h1
/config.sub
/config.guess
# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
# Logs and databases #
######################
*.log
*.sql
*.sqlite
# OS generated files #
######################
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

View File

@ -20,7 +20,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
for most programmers.
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
These are presented as
@ -46,3 +46,5 @@ are examples:
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
For developers:
Use reconfigure_script in the scripts/ directory to create the autotools environment

1872
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@
#
# Project Grid package
#
# Time-stamp: <2015-05-26 17:18:54 neo>
# Time-stamp: <2015-05-27 18:29:04 neo>
AC_PREREQ([2.63])
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
@ -27,7 +27,7 @@ AC_PROG_CXX
AC_OPENMP
AC_PROG_RANLIB
AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
AX_EXT
# Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)
@ -69,26 +69,44 @@ Info at: http://www.mpfr.org/)])
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE|AVX|AVX2|AVX512|MIC],\
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2])
supported=no
case ${ac_SIMD} in
SSE4)
echo Configuring for SSE4
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
AC_DEFINE([SSE4],[1],[SSE4] )
supported=yes
else
AC_MSG_WARN([Your processor does not support SSE4 instructions])
fi
;;
AVX)
echo Configuring for AVX
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
AC_DEFINE([AVX1],[1],[AVX] )
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVX2)
echo Configuring for AVX2
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
AC_DEFINE([AVX2],[1],[AVX2] )
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX2 instructions])
fi
;;
AVX512|MIC)
echo Configuring for AVX512 and MIC
AC_DEFINE([AVX512],[1],[AVX512] )
supported=yes
;;
*)
AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]);
@ -129,7 +147,9 @@ then
AC_CONFIG_FILES([docs/doxy.cfg])
fi
echo
echo Creating configuration files
echo :::::::::::::::::::::::::::::::::::::::::::
AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile)
@ -150,9 +170,9 @@ The following features are enabled:
- os (target) : $target_os
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
- Supported SIMD flags : $SIMD_FLAGS
----------------------------------------------------------
- enabled simd support : ${ac_SIMD}
- enabled simd support : ${ac_SIMD} (supported: $supported )
- communications type : ${ac_COMMS}

View File

@ -16,6 +16,15 @@
/* GRID_COMMS_NONE */
#define GRID_COMMS_NONE 1
/* Support Altivec instructions */
/* #undef HAVE_ALTIVEC */
/* Support AVX (Advanced Vector Extensions) instructions */
/* #undef HAVE_AVX */
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
/* #undef HAVE_AVX2 */
/* define if the compiler supports basic C++11 syntax */
/* #undef HAVE_CXX11 */
@ -30,6 +39,9 @@
/* Define to 1 if you have the <endian.h> header file. */
#define HAVE_ENDIAN_H 1
/* Support FMA3 (Fused Multiply-Add) instructions */
/* #undef HAVE_FMA */
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
@ -54,9 +66,30 @@
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Support mmx instructions */
#define HAVE_MMX /**/
/* Define to 1 if you have the <mm_malloc.h> header file. */
#define HAVE_MM_MALLOC_H 1
/* Support SSE (Streaming SIMD Extensions) instructions */
#define HAVE_SSE /**/
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
#define HAVE_SSE2 /**/
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
#define HAVE_SSE3 /**/
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
#define HAVE_SSE4_1 /**/
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
#define HAVE_SSE4_2 /**/
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
#define HAVE_SSSE3 /**/
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1

View File

@ -15,6 +15,15 @@
/* GRID_COMMS_NONE */
#undef GRID_COMMS_NONE
/* Support Altivec instructions */
#undef HAVE_ALTIVEC
/* Support AVX (Advanced Vector Extensions) instructions */
#undef HAVE_AVX
/* Support AVX2 (Advanced Vector Extensions 2) instructions */
#undef HAVE_AVX2
/* define if the compiler supports basic C++11 syntax */
#undef HAVE_CXX11
@ -29,6 +38,9 @@
/* Define to 1 if you have the <endian.h> header file. */
#undef HAVE_ENDIAN_H
/* Support FMA3 (Fused Multiply-Add) instructions */
#undef HAVE_FMA
/* Define to 1 if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
@ -53,9 +65,30 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Support mmx instructions */
#undef HAVE_MMX
/* Define to 1 if you have the <mm_malloc.h> header file. */
#undef HAVE_MM_MALLOC_H
/* Support SSE (Streaming SIMD Extensions) instructions */
#undef HAVE_SSE
/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
#undef HAVE_SSE2
/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
#undef HAVE_SSE3
/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
#undef HAVE_SSE4_1
/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
#undef HAVE_SSE4_2
/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
#undef HAVE_SSSE3
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H

View File

@ -0,0 +1,72 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 2
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS

288
m4/ax_ext.m4 Normal file
View File

@ -0,0 +1,288 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_EXT
#
# DESCRIPTION
#
# Find supported SIMD extensions by requesting cpuid. When an SIMD
# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
# compiler supports it. For example, if "sse2" is available, then "-msse2"
# is added to SIMD_FLAGS.
#
# This macro calls:
#
# AC_SUBST(SIMD_FLAGS)
#
# And defines:
#
# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
#
# LICENSE
#
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 13
AC_DEFUN([AX_EXT],
[
AC_REQUIRE([AC_CANONICAL_HOST])
case $host_cpu in
powerpc*)
AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
[
if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
ax_cv_have_altivec_ext=yes
fi
fi
])
if test "$ax_cv_have_altivec_ext" = yes; then
AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], [])
fi
;;
i[[3456]]86*|x86_64*|amd64*)
AC_REQUIRE([AX_GCC_X86_CPUID])
AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
AX_GCC_X86_CPUID(0x00000001)
ecx=0
edx=0
ebx=0
if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown";
then
ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
fi
AX_GCC_X86_CPUID(0x00000007)
if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown";
then
ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2`
fi
AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
[
ax_cv_have_mmx_ext=no
if test "$((0x$edx>>23&0x01))" = 1; then
ax_cv_have_mmx_ext=yes
fi
])
AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
[
ax_cv_have_sse_ext=no
if test "$((0x$edx>>25&0x01))" = 1; then
ax_cv_have_sse_ext=yes
fi
])
AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
[
ax_cv_have_sse2_ext=no
if test "$((0x$edx>>26&0x01))" = 1; then
ax_cv_have_sse2_ext=yes
fi
])
AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
[
ax_cv_have_sse3_ext=no
if test "$((0x$ecx&0x01))" = 1; then
ax_cv_have_sse3_ext=yes
fi
])
AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
[
ax_cv_have_ssse3_ext=no
if test "$((0x$ecx>>9&0x01))" = 1; then
ax_cv_have_ssse3_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
[
ax_cv_have_sse41_ext=no
if test "$((0x$ecx>>19&0x01))" = 1; then
ax_cv_have_sse41_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
[
ax_cv_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
ax_cv_have_sse42_ext=yes
fi
])
AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
[
ax_cv_have_avx_cpu_ext=no
if test "$((0x$ecx>>28&0x01))" = 1; then
ax_cv_have_avx_cpu_ext=yes
fi
])
AC_CACHE_CHECK([whether avx2 is supported by processor], [ax_cv_have_avx2_cpu_ext],
[
ax_cv_have_avx2_cpu_ext=no
if test "$((0x$ebx>>5&0x01))" = 1; then
ax_cv_have_avx2_cpu_ext=yes
fi
])
AC_CACHE_CHECK([whether fma is supported by processor], [ax_cv_have_fma_cpu_ext],
[
ax_cv_have_fma_cpu_ext=no
if test "$((0x$ecx>>12&0x01))" = 1; then
ax_cv_have_fma_cpu_ext=yes
fi
])
if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
AX_GCC_X86_AVX_XGETBV(0x00000000)
xgetbv_eax="0"
if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
fi
AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
[
ax_cv_have_avx_ext=no
if test "$((0x$ecx>>27&0x01))" = 1; then
if test "$((0x$xgetbv_eax&0x6))" = 6; then
ax_cv_have_avx_ext=yes
fi
fi
])
if test x"$ax_cv_have_avx_ext" = x"no"; then
AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
fi
fi
if test "$ax_cv_have_mmx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
if test x"$ax_cv_support_mmx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mmmx"
AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
else
AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
if test x"$ax_cv_support_sse_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse"
AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
else
AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse2_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
if test x"$ax_cv_support_sse2_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse2"
AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
else
AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse3"
AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_ssse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mssse3"
AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse41_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
if test x"$ax_cv_support_sse41_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.1"
AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
else
AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse42_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
if test x"$ax_cv_support_sse42_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.2"
AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
else
AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_avx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
if test x"$ax_cv_support_avx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mavx"
AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
else
AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_avx2_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mavx2, ax_cv_support_avx2_ext=yes, [])
if test x"$ax_cv_support_avx2_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mavx2"
AC_DEFINE(HAVE_AVX2,,[Support AVX2 (Advanced Vector Extensions 2) instructions])
else
AC_MSG_WARN([Your processor supports avx2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_fma_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mfma, ax_cv_support_fma_ext=yes, [])
if test x"$ax_cv_support_fma_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mfma"
AC_DEFINE(HAVE_FMA,,[Support FMA3 (Fused Multiply-Add) instructions])
else
AC_MSG_WARN([Your processor supports fma instructions but not your compiler, can you try another compiler?])
fi
fi
;;
esac
AC_SUBST(SIMD_FLAGS)
])

View File

@ -0,0 +1,79 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_AVX_XGETBV
#
# DESCRIPTION
#
# On later x86 processors with AVX SIMD support, with gcc or a compiler
# that has a compatible syntax for inline assembly instructions, run a
# small program that executes the xgetbv instruction with input OP. This
# can be used to detect if the OS supports AVX instruction usage.
#
# On output, the values of the eax and edx registers are stored as
# hexadecimal strings as "eax:edx" in the cache variable
# ax_cv_gcc_x86_avx_xgetbv.
#
# If the xgetbv instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction),
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
#
# This macro mainly exists to be used in AX_EXT.
#
# LICENSE
#
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 1
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, edx;
FILE *f;
/* Opcodes for xgetbv */
__asm__(".byte 0x0f, 0x01, 0xd0"
: "=a" (eax), "=d" (edx),
: "c" (op));
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
fprintf(f, "%x:%x\n", eax, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
AC_LANG_POP([C])
])

45
m4/ax_gcc_x86_cpuid.m4 Normal file
View File

@ -0,0 +1,45 @@
dnl @synopsis AX_GCC_X86_CPUID(OP)
dnl
dnl @summary run x86 cpuid instruction OP using gcc inline assembler
dnl
dnl On Pentium and later x86 processors, with gcc or a compiler that
dnl has a compatible syntax for inline assembly instructions, run a
dnl small program that executes the cpuid instruction with input OP.
dnl This can be used to detect the CPU type.
dnl
dnl On output, the values of the eax, ebx, ecx, and edx registers are
dnl stored as hexadecimal strings as "eax:ebx:ecx:edx" in the cache
dnl variable ax_cv_gcc_x86_cpuid_OP.
dnl
dnl If the cpuid instruction fails (because you are running a
dnl cross-compiler, or because you are not using gcc, or because you
dnl are on a processor that doesn't have this instruction),
dnl ax_cv_gcc_x86_cpuid_OP is set to the string "unknown".
dnl
dnl This macro mainly exists to be used in AX_GCC_ARCHFLAG.
dnl
dnl @category Misc
dnl @author Steven G. Johnson <stevenj@alum.mit.edu> and Matteo Frigo.
dnl @version 2005-05-30
dnl @license GPLWithACException
AC_DEFUN([AX_GCC_X86_CPUID],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, ebx, ecx, edx;
FILE *f;
__asm__("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (op));
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown])])
AC_LANG_POP([C])
])