From 644fd6d32e15d832712041f478eaea8739481fa7 Mon Sep 17 00:00:00 2001 From: paboyle Date: Fri, 25 Mar 2016 09:35:33 -0700 Subject: [PATCH 1/7] Build avx512 clean --- configure | 2293 +------------------- configure.ac | 2 +- lib/Config.h.in | 33 - lib/qcd/action/fermion/WilsonKernels.cc | 2 +- lib/qcd/action/fermion/WilsonKernelsAsm.cc | 3 +- lib/simd/Grid_avx512.h | 4 +- 6 files changed, 8 insertions(+), 2329 deletions(-) diff --git a/configure b/configure index ec22b43c..cbc70b99 100755 --- a/configure +++ b/configure @@ -656,13 +656,6 @@ BUILD_ZMM_TRUE EGREP GREP CXXCPP -SIMD_FLAGS -am__fastdepCC_FALSE -am__fastdepCC_TRUE -CCDEPMODE -ac_ct_CC -CFLAGS -CC RANLIB OPENMP_CXXFLAGS am__fastdepCXX_FALSE @@ -774,8 +767,6 @@ LDFLAGS LIBS CPPFLAGS CCC -CC -CFLAGS CXXCPP' @@ -1430,8 +1421,6 @@ Some influential environment variables: LIBS libraries to pass to the linker, e.g. -l CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if you have headers in a nonstandard directory - CC C compiler command - CFLAGS C compiler flags CXXCPP C++ preprocessor Use these variables to override the choices made by `configure' or to help @@ -3525,2278 +3514,7 @@ else fi #AX_CXX_COMPILE_STDCXX_11(noext, mandatory) -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:$LINENO: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_ac_ct_CC+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi - -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:$LINENO: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - - fi -fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" - fi -fi -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:$LINENO: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_CC+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:$LINENO: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$CC" && break - done -fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_ac_ct_CC+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_CC" && break -done - - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -fi - -fi - - -test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH -See \`config.log' for more details." >&5 -$as_echo "$as_me: error: no acceptable C compiler found in \$PATH -See \`config.log' for more details." >&2;} - { (exit 1); exit 1; }; }; } - -# Provide some information about the compiler. -$as_echo "$as_me:$LINENO: checking for C compiler version" >&5 -set X $ac_compile -ac_compiler=$2 -{ (ac_try="$ac_compiler --version >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compiler --version >&5") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -v >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compiler -v >&5") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } -{ (ac_try="$ac_compiler -V >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compiler -V >&5") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } - -{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if test "${ac_cv_c_compiler_gnu+set}" = set; then - $as_echo_n "(cached) " >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ -#ifndef __GNUC__ - choke me -#endif - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_compiler_gnu=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_compiler_gnu=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu - -fi -{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } -if test $ac_compiler_gnu = yes; then - GCC=yes -else - GCC= -fi -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if test "${ac_cv_prog_cc_g+set}" = set; then - $as_echo_n "(cached) " >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - CFLAGS="" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - : -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_g=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag -fi -{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi -{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if test "${ac_cv_prog_cc_c89+set}" = set; then - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -#include -#include -#include -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} -_ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ac_cv_prog_cc_c89=$ac_arg -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - -fi - -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break -done -rm -f conftest.$ac_ext -CC=$ac_save_CC - -fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:$LINENO: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:$LINENO: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac - - -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - -depcc="$CC" am_compiler_list= - -{ $as_echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 -$as_echo_n "checking dependency style of $depcc... " >&6; } -if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then - # We make a subdir and do the tests there. Otherwise we can end up - # making bogus files that we don't know about and never remove. For - # instance it was reported that on HP-UX the gcc test will end up - # making a dummy file named `D' -- because `-MD' means `put the output - # in D'. - mkdir conftest.dir - # Copy depcomp to subdir because otherwise we won't find it if we're - # using a relative directory. - cp "$am_depcomp" conftest.dir - cd conftest.dir - # We will build objects and dependencies in a subdirectory because - # it helps to detect inapplicable dependency modes. For instance - # both Tru64's cc and ICC support -MD to output dependencies as a - # side effect of compilation, but ICC will put the dependencies in - # the current directory while Tru64 will put them in the object - # directory. - mkdir sub - - am_cv_CC_dependencies_compiler_type=none - if test "$am_compiler_list" = ""; then - am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` - fi - for depmode in $am_compiler_list; do - # Setup a source with many dependencies, because some compilers - # like to wrap large dependency lists on column 80 (with \), and - # we should not choose a depcomp mode which is confused by this. - # - # We need to recreate these files for each test, as the compiler may - # overwrite some of them when testing with obscure command lines. - # This happens at least with the AIX C compiler. - : > sub/conftest.c - for i in 1 2 3 4 5 6; do - echo '#include "conftst'$i'.h"' >> sub/conftest.c - # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with - # Solaris 8's {/usr,}/bin/sh. - touch sub/conftst$i.h - done - echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf - - case $depmode in - nosideeffect) - # after this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - none) break ;; - esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. - if depmode=$depmode \ - source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ - depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ - >/dev/null 2>conftest.err && - grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && - ${MAKE-make} -s -f confmf > /dev/null 2>&1; then - # icc doesn't choke on unknown options, it will just issue warnings - # or remarks (even with -Werror). So we grep stderr for any message - # that says an option was ignored or not supported. - # When given -MP, icc 7.0 and 7.1 complain thusly: - # icc: Command line warning: ignoring option '-M'; no argument required - # The diagnosis changed in icc 8.0: - # icc: Command line remark: option '-MP' not supported - if (grep 'ignoring option' conftest.err || - grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else - am_cv_CC_dependencies_compiler_type=$depmode - break - fi - fi - done - - cd .. - rm -rf conftest.dir -else - am_cv_CC_dependencies_compiler_type=none -fi - -fi -{ $as_echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 -$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } -CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type - - if - test "x$enable_dependency_tracking" != xno \ - && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then - am__fastdepCC_TRUE= - am__fastdepCC_FALSE='#' -else - am__fastdepCC_TRUE='#' - am__fastdepCC_FALSE= -fi - - - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -{ $as_echo "$as_me:$LINENO: checking for x86 cpuid output" >&5 -$as_echo_n "checking for x86 cpuid output... " >&6; } -if test "${ax_cv_gcc_x86_cpuid_+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then - ax_cv_gcc_x86_cpuid_=unknown -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ - - int op = , eax, ebx, ecx, edx; - FILE *f; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); - f = fopen("conftest_cpuid", "w"); if (!f) return 1; - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); - fclose(f); - return 0; - - ; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ax_cv_gcc_x86_cpuid_=`cat conftest_cpuid`; rm -f conftest_cpuid -else - $as_echo "$as_me: program exited with status $ac_status" >&5 -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ax_cv_gcc_x86_cpuid_=unknown; rm -f conftest_cpuid -fi -rm -rf conftest.dSYM -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_" >&5 -$as_echo "$ax_cv_gcc_x86_cpuid_" >&6; } -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -{ $as_echo "$as_me:$LINENO: checking for x86-AVX xgetbv output" >&5 -$as_echo_n "checking for x86-AVX xgetbv output... " >&6; } -if test "${ax_cv_gcc_x86_avx_xgetbv_+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then - ax_cv_gcc_x86_avx_xgetbv_=unknown -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ - - int op = , eax, edx; - FILE *f; - /* Opcodes for xgetbv */ - __asm__(".byte 0x0f, 0x01, 0xd0" - : "=a" (eax), "=d" (edx) - : "c" (op)); - f = fopen("conftest_xgetbv", "w"); if (!f) return 1; - fprintf(f, "%x:%x\n", eax, edx); - fclose(f); - return 0; - - ; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ax_cv_gcc_x86_avx_xgetbv_=`cat conftest_xgetbv`; rm -f conftest_xgetbv -else - $as_echo "$as_me: program exited with status $ac_status" >&5 -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ax_cv_gcc_x86_avx_xgetbv_=unknown; rm -f conftest_xgetbv -fi -rm -rf conftest.dSYM -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_avx_xgetbv_" >&5 -$as_echo "$ax_cv_gcc_x86_avx_xgetbv_" >&6; } -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - - - - - case $host_cpu in - powerpc*) - { $as_echo "$as_me:$LINENO: checking whether altivec is supported" >&5 -$as_echo_n "checking whether altivec is supported... " >&6; } -if test "${ax_cv_have_altivec_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then - if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then - ax_cv_have_altivec_ext=yes - fi - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_altivec_ext" >&5 -$as_echo "$ax_cv_have_altivec_ext" >&6; } - - if test "$ax_cv_have_altivec_ext" = yes; then - -cat >>confdefs.h <<\_ACEOF -#define HAVE_ALTIVEC /**/ -_ACEOF - - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -faltivec" >&5 -$as_echo_n "checking whether C++ compiler accepts -faltivec... " >&6; } -if test "${ax_cv_check_cxxflags___faltivec+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -faltivec" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___faltivec=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___faltivec=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___faltivec" >&5 -$as_echo "$ax_cv_check_cxxflags___faltivec" >&6; } -if test x"$ax_cv_check_cxxflags___faltivec" = xyes; then - SIMD_FLAGS="$SIMD_FLAGS -faltivec" -else - : -fi - - - fi - ;; - - - i[3456]86*|x86_64*|amd64*) - - - - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -{ $as_echo "$as_me:$LINENO: checking for x86 cpuid 0x00000001 output" >&5 -$as_echo_n "checking for x86 cpuid 0x00000001 output... " >&6; } -if test "${ax_cv_gcc_x86_cpuid_0x00000001+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then - ax_cv_gcc_x86_cpuid_0x00000001=unknown -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ - - int op = 0x00000001, eax, ebx, ecx, edx; - FILE *f; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); - f = fopen("conftest_cpuid", "w"); if (!f) return 1; - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); - fclose(f); - return 0; - - ; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ax_cv_gcc_x86_cpuid_0x00000001=`cat conftest_cpuid`; rm -f conftest_cpuid -else - $as_echo "$as_me: program exited with status $ac_status" >&5 -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ax_cv_gcc_x86_cpuid_0x00000001=unknown; rm -f conftest_cpuid -fi -rm -rf conftest.dSYM -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x00000001" >&5 -$as_echo "$ax_cv_gcc_x86_cpuid_0x00000001" >&6; } -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - - ecx=0 - edx=0 - ebx=0 - if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown"; - then - ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` - edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` - fi - - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -{ $as_echo "$as_me:$LINENO: checking for x86 cpuid 0x00000007 output" >&5 -$as_echo_n "checking for x86 cpuid 0x00000007 output... " >&6; } -if test "${ax_cv_gcc_x86_cpuid_0x00000007+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then - ax_cv_gcc_x86_cpuid_0x00000007=unknown -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ - - int op = 0x00000007, eax, ebx, ecx, edx; - FILE *f; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); - f = fopen("conftest_cpuid", "w"); if (!f) return 1; - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); - fclose(f); - return 0; - - ; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ax_cv_gcc_x86_cpuid_0x00000007=`cat conftest_cpuid`; rm -f conftest_cpuid -else - $as_echo "$as_me: program exited with status $ac_status" >&5 -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ax_cv_gcc_x86_cpuid_0x00000007=unknown; rm -f conftest_cpuid -fi -rm -rf conftest.dSYM -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x00000007" >&5 -$as_echo "$ax_cv_gcc_x86_cpuid_0x00000007" >&6; } -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - - if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown"; - then - ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2` - fi - - { $as_echo "$as_me:$LINENO: checking whether mmx is supported" >&5 -$as_echo_n "checking whether mmx is supported... " >&6; } -if test "${ax_cv_have_mmx_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_mmx_ext=no - if test "$((0x$edx>>23&0x01))" = 1; then - ax_cv_have_mmx_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_mmx_ext" >&5 -$as_echo "$ax_cv_have_mmx_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether sse is supported" >&5 -$as_echo_n "checking whether sse is supported... " >&6; } -if test "${ax_cv_have_sse_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_sse_ext=no - if test "$((0x$edx>>25&0x01))" = 1; then - ax_cv_have_sse_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse_ext" >&5 -$as_echo "$ax_cv_have_sse_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether sse2 is supported" >&5 -$as_echo_n "checking whether sse2 is supported... " >&6; } -if test "${ax_cv_have_sse2_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_sse2_ext=no - if test "$((0x$edx>>26&0x01))" = 1; then - ax_cv_have_sse2_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse2_ext" >&5 -$as_echo "$ax_cv_have_sse2_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether sse3 is supported" >&5 -$as_echo_n "checking whether sse3 is supported... " >&6; } -if test "${ax_cv_have_sse3_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_sse3_ext=no - if test "$((0x$ecx&0x01))" = 1; then - ax_cv_have_sse3_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse3_ext" >&5 -$as_echo "$ax_cv_have_sse3_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether ssse3 is supported" >&5 -$as_echo_n "checking whether ssse3 is supported... " >&6; } -if test "${ax_cv_have_ssse3_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_ssse3_ext=no - if test "$((0x$ecx>>9&0x01))" = 1; then - ax_cv_have_ssse3_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_ssse3_ext" >&5 -$as_echo "$ax_cv_have_ssse3_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether sse4.1 is supported" >&5 -$as_echo_n "checking whether sse4.1 is supported... " >&6; } -if test "${ax_cv_have_sse41_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_sse41_ext=no - if test "$((0x$ecx>>19&0x01))" = 1; then - ax_cv_have_sse41_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse41_ext" >&5 -$as_echo "$ax_cv_have_sse41_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether sse4.2 is supported" >&5 -$as_echo_n "checking whether sse4.2 is supported... " >&6; } -if test "${ax_cv_have_sse42_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_sse42_ext=no - if test "$((0x$ecx>>20&0x01))" = 1; then - ax_cv_have_sse42_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse42_ext" >&5 -$as_echo "$ax_cv_have_sse42_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether avx is supported by processor" >&5 -$as_echo_n "checking whether avx is supported by processor... " >&6; } -if test "${ax_cv_have_avx_cpu_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_avx_cpu_ext=no - if test "$((0x$ecx>>28&0x01))" = 1; then - ax_cv_have_avx_cpu_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_avx_cpu_ext" >&5 -$as_echo "$ax_cv_have_avx_cpu_ext" >&6; } - - { $as_echo "$as_me:$LINENO: checking whether avx2 is supported by processor" >&5 -$as_echo_n "checking whether avx2 is supported by processor... " >&6; } -if test "${ax_cv_have_avx2_cpu_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_avx2_cpu_ext=no - if test "$((0x$ebx>>5&0x01))" = 1; then - ax_cv_have_avx2_cpu_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_avx2_cpu_ext" >&5 -$as_echo "$ax_cv_have_avx2_cpu_ext" >&6; } - - - { $as_echo "$as_me:$LINENO: checking whether fma is supported by processor" >&5 -$as_echo_n "checking whether fma is supported by processor... " >&6; } -if test "${ax_cv_have_fma_cpu_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_fma_cpu_ext=no - if test "$((0x$ecx>>12&0x01))" = 1; then - ax_cv_have_fma_cpu_ext=yes - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_fma_cpu_ext" >&5 -$as_echo "$ax_cv_have_fma_cpu_ext" >&6; } - - - if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then - -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - -{ $as_echo "$as_me:$LINENO: checking for x86-AVX xgetbv 0x00000000 output" >&5 -$as_echo_n "checking for x86-AVX xgetbv 0x00000000 output... " >&6; } -if test "${ax_cv_gcc_x86_avx_xgetbv_0x00000000+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test "$cross_compiling" = yes; then - ax_cv_gcc_x86_avx_xgetbv_0x00000000=unknown -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ - - int op = 0x00000000, eax, edx; - FILE *f; - /* Opcodes for xgetbv */ - __asm__(".byte 0x0f, 0x01, 0xd0" - : "=a" (eax), "=d" (edx) - : "c" (op)); - f = fopen("conftest_xgetbv", "w"); if (!f) return 1; - fprintf(f, "%x:%x\n", eax, edx); - fclose(f); - return 0; - - ; - return 0; -} -_ACEOF -rm -f conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { ac_try='./conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ax_cv_gcc_x86_avx_xgetbv_0x00000000=`cat conftest_xgetbv`; rm -f conftest_xgetbv -else - $as_echo "$as_me: program exited with status $ac_status" >&5 -$as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -( exit $ac_status ) -ax_cv_gcc_x86_avx_xgetbv_0x00000000=unknown; rm -f conftest_xgetbv -fi -rm -rf conftest.dSYM -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext -fi - - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_avx_xgetbv_0x00000000" >&5 -$as_echo "$ax_cv_gcc_x86_avx_xgetbv_0x00000000" >&6; } -ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - - - xgetbv_eax="0" - if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then - xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` - fi - - { $as_echo "$as_me:$LINENO: checking whether avx is supported by operating system" >&5 -$as_echo_n "checking whether avx is supported by operating system... " >&6; } -if test "${ax_cv_have_avx_ext+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_cv_have_avx_ext=no - - if test "$((0x$ecx>>27&0x01))" = 1; then - if test "$((0x$xgetbv_eax&0x6))" = 6; then - ax_cv_have_avx_ext=yes - fi - fi - -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_avx_ext" >&5 -$as_echo "$ax_cv_have_avx_ext" >&6; } - if test x"$ax_cv_have_avx_ext" = x"no"; then - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports AVX, but your operating system doesn't" >&5 -$as_echo "$as_me: WARNING: Your processor supports AVX, but your operating system doesn't" >&2;} - fi - fi - - if test "$ax_cv_have_mmx_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mmmx" >&5 -$as_echo_n "checking whether C++ compiler accepts -mmmx... " >&6; } -if test "${ax_cv_check_cxxflags___mmmx+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -mmmx" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___mmmx=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___mmmx=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mmmx" >&5 -$as_echo "$ax_cv_check_cxxflags___mmmx" >&6; } -if test x"$ax_cv_check_cxxflags___mmmx" = xyes; then - ax_cv_support_mmx_ext=yes -else - : -fi - - - if test x"$ax_cv_support_mmx_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mmmx" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_MMX /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports mmx instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports mmx instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_sse_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse" >&5 -$as_echo_n "checking whether C++ compiler accepts -msse... " >&6; } -if test "${ax_cv_check_cxxflags___msse+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -msse" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___msse=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___msse=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse" >&5 -$as_echo "$ax_cv_check_cxxflags___msse" >&6; } -if test x"$ax_cv_check_cxxflags___msse" = xyes; then - ax_cv_support_sse_ext=yes -else - : -fi - - - if test x"$ax_cv_support_sse_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SSE /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports sse instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_sse2_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse2" >&5 -$as_echo_n "checking whether C++ compiler accepts -msse2... " >&6; } -if test "${ax_cv_check_cxxflags___msse2+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -msse2" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___msse2=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___msse2=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse2" >&5 -$as_echo "$ax_cv_check_cxxflags___msse2" >&6; } -if test x"$ax_cv_check_cxxflags___msse2" = xyes; then - ax_cv_support_sse2_ext=yes -else - : -fi - - - if test x"$ax_cv_support_sse2_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse2" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SSE2 /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse2 instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports sse2 instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_sse3_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse3" >&5 -$as_echo_n "checking whether C++ compiler accepts -msse3... " >&6; } -if test "${ax_cv_check_cxxflags___msse3+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -msse3" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___msse3=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___msse3=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse3" >&5 -$as_echo "$ax_cv_check_cxxflags___msse3" >&6; } -if test x"$ax_cv_check_cxxflags___msse3" = xyes; then - ax_cv_support_sse3_ext=yes -else - : -fi - - - if test x"$ax_cv_support_sse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse3" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SSE3 /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse3 instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports sse3 instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_ssse3_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mssse3" >&5 -$as_echo_n "checking whether C++ compiler accepts -mssse3... " >&6; } -if test "${ax_cv_check_cxxflags___mssse3+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -mssse3" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___mssse3=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___mssse3=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mssse3" >&5 -$as_echo "$ax_cv_check_cxxflags___mssse3" >&6; } -if test x"$ax_cv_check_cxxflags___mssse3" = xyes; then - ax_cv_support_ssse3_ext=yes -else - : -fi - - - if test x"$ax_cv_support_ssse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mssse3" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SSSE3 /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports ssse3 instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports ssse3 instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_sse41_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse4.1" >&5 -$as_echo_n "checking whether C++ compiler accepts -msse4.1... " >&6; } -if test "${ax_cv_check_cxxflags___msse4_1+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -msse4.1" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___msse4_1=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___msse4_1=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse4_1" >&5 -$as_echo "$ax_cv_check_cxxflags___msse4_1" >&6; } -if test x"$ax_cv_check_cxxflags___msse4_1" = xyes; then - ax_cv_support_sse41_ext=yes -else - : -fi - - - if test x"$ax_cv_support_sse41_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse4.1" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SSE4_1 /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_sse42_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse4.2" >&5 -$as_echo_n "checking whether C++ compiler accepts -msse4.2... " >&6; } -if test "${ax_cv_check_cxxflags___msse4_2+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -msse4.2" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___msse4_2=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___msse4_2=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse4_2" >&5 -$as_echo "$ax_cv_check_cxxflags___msse4_2" >&6; } -if test x"$ax_cv_check_cxxflags___msse4_2" = xyes; then - ax_cv_support_sse42_ext=yes -else - : -fi - - - if test x"$ax_cv_support_sse42_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse4.2" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SSE4_2 /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_avx_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mavx" >&5 -$as_echo_n "checking whether C++ compiler accepts -mavx... " >&6; } -if test "${ax_cv_check_cxxflags___mavx+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -mavx" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___mavx=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___mavx=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mavx" >&5 -$as_echo "$ax_cv_check_cxxflags___mavx" >&6; } -if test x"$ax_cv_check_cxxflags___mavx" = xyes; then - ax_cv_support_avx_ext=yes -else - : -fi - - - if test x"$ax_cv_support_avx_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mavx" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_AVX /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports avx instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports avx instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_avx2_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mavx2" >&5 -$as_echo_n "checking whether C++ compiler accepts -mavx2... " >&6; } -if test "${ax_cv_check_cxxflags___mavx2+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -mavx2" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___mavx2=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___mavx2=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mavx2" >&5 -$as_echo "$ax_cv_check_cxxflags___mavx2" >&6; } -if test x"$ax_cv_check_cxxflags___mavx2" = xyes; then - ax_cv_support_avx2_ext=yes -else - : -fi - - - if test x"$ax_cv_support_avx2_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mavx2" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_AVX2 /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports avx2 instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports avx2 instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - if test "$ax_cv_have_fma_ext" = yes; then - { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mfma" >&5 -$as_echo_n "checking whether C++ compiler accepts -mfma... " >&6; } -if test "${ax_cv_check_cxxflags___mfma+set}" = set; then - $as_echo_n "(cached) " >&6 -else - - ax_check_save_flags=$CXXFLAGS - CXXFLAGS="$CXXFLAGS -mfma" - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - -int -main () -{ - - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" -$as_echo "$ac_try_echo") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && { - test -z "$ac_cxx_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then - ax_cv_check_cxxflags___mfma=yes -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ax_cv_check_cxxflags___mfma=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - CXXFLAGS=$ax_check_save_flags -fi -{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mfma" >&5 -$as_echo "$ax_cv_check_cxxflags___mfma" >&6; } -if test x"$ax_cv_check_cxxflags___mfma" = xyes; then - ax_cv_support_fma_ext=yes -else - : -fi - - - if test x"$ax_cv_support_fma_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -mfma" - -cat >>confdefs.h <<\_ACEOF -#define HAVE_FMA /**/ -_ACEOF - - else - { $as_echo "$as_me:$LINENO: WARNING: Your processor supports fma instructions but not your compiler, can you try another compiler?" >&5 -$as_echo "$as_me: WARNING: Your processor supports fma instructions but not your compiler, can you try another compiler?" >&2;} - fi - fi - - ;; - esac - - - +AX_EXT # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) @@ -8073,7 +5791,7 @@ cat >>confdefs.h <<\_ACEOF _ACEOF supported="cross compilation" - ac_ZMM=yes; + ac_ZMM=no; ;; IMCI) echo Configuring for IMCI @@ -8465,13 +6183,6 @@ $as_echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi -if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then - { { $as_echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. -Usually this means the macro was only invoked conditionally." >&5 -$as_echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. -Usually this means the macro was only invoked conditionally." >&2;} - { (exit 1); exit 1; }; } -fi if test -z "${BUILD_ZMM_TRUE}" && test -z "${BUILD_ZMM_FALSE}"; then { { $as_echo "$as_me:$LINENO: error: conditional \"BUILD_ZMM\" was never defined. Usually this means the macro was only invoked conditionally." >&5 diff --git a/configure.ac b/configure.ac index 4da6e3d9..e8734bf6 100644 --- a/configure.ac +++ b/configure.ac @@ -118,7 +118,7 @@ case ${ac_SIMD} in echo Configuring for AVX512 AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] ) supported="cross compilation" - ac_ZMM=yes; + ac_ZMM=no; ;; IMCI) echo Configuring for IMCI diff --git a/lib/Config.h.in b/lib/Config.h.in index 4d6354fa..28533b5d 100644 --- a/lib/Config.h.in +++ b/lib/Config.h.in @@ -30,15 +30,6 @@ /* GRID_DEFAULT_PRECISION is SINGLE */ #undef GRID_DEFAULT_PRECISION_SINGLE -/* Support Altivec instructions */ -#undef HAVE_ALTIVEC - -/* Support AVX (Advanced Vector Extensions) instructions */ -#undef HAVE_AVX - -/* Support AVX2 (Advanced Vector Extensions 2) instructions */ -#undef HAVE_AVX2 - /* Define to 1 if you have the declaration of `be64toh', and to 0 if you don't. */ #undef HAVE_DECL_BE64TOH @@ -53,9 +44,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_EXECINFO_H -/* Support FMA3 (Fused Multiply-Add) instructions */ -#undef HAVE_FMA - /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY @@ -74,30 +62,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H -/* Support mmx instructions */ -#undef HAVE_MMX - /* Define to 1 if you have the header file. */ #undef HAVE_MM_MALLOC_H -/* Support SSE (Streaming SIMD Extensions) instructions */ -#undef HAVE_SSE - -/* Support SSE2 (Streaming SIMD Extensions 2) instructions */ -#undef HAVE_SSE2 - -/* Support SSE3 (Streaming SIMD Extensions 3) instructions */ -#undef HAVE_SSE3 - -/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */ -#undef HAVE_SSE4_1 - -/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */ -#undef HAVE_SSE4_2 - -/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */ -#undef HAVE_SSSE3 - /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 7410024f..219e1a1d 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -596,7 +596,7 @@ void WilsonKernels::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, vstream(out._odata[sF],result); } -#if ( ! defined(AVX512) ) && ( ! defined(IMCI) ) +#if ( ! defined(IMCI) ) template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index efabb610..e5174484 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -27,7 +27,8 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ #include -#if defined(AVX512) || defined (IMCI) +//#if defined(AVX512) || defined (IMCI) +#if defined (IMCI) #include diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 12a168ef..0a95960e 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -39,7 +39,7 @@ Author: paboyle #include - +namespace Grid{ namespace Optimization { struct Vsplat{ @@ -345,7 +345,7 @@ namespace Optimization { ////////////////////////////////////////////////////////////////////////////////////// // Here assign types -namespace Grid { + typedef __m512 SIMD_Ftype; // Single precision type typedef __m512d SIMD_Dtype; // Double precision type typedef __m512i SIMD_Itype; // Integer type From 165bffc2e70c8c5fb55be4356d33567156c8f106 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 26 Mar 2016 22:24:07 -0600 Subject: [PATCH 2/7] Avx512 changes for assembler kernels --- configure | 2733 +++++++++++++++++-- configure.ac | 2 +- lib/Config.h.in | 33 + lib/Grid.h | 1 + lib/PerfCount.h | 14 +- lib/Simd.h | 1 + lib/qcd/action/fermion/WilsonFermion.cc | 64 +- lib/qcd/action/fermion/WilsonFermion5D.cc | 130 +- lib/qcd/action/fermion/WilsonKernels.cc | 286 +- lib/qcd/action/fermion/WilsonKernels.h | 10 +- lib/qcd/action/fermion/WilsonKernelsAsm.cc | 9 +- lib/qcd/action/fermion/WilsonKernelsHand.cc | 566 +--- lib/simd/Avx512Asm.h | 83 +- tests/Test_zmm.cc | 6 +- 14 files changed, 2806 insertions(+), 1132 deletions(-) diff --git a/configure b/configure index cbc70b99..4d465a64 100755 --- a/configure +++ b/configure @@ -637,7 +637,9 @@ ac_includes_default="\ # include #endif" -ac_subst_vars='LTLIBOBJS +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS LIBOBJS USE_LAPACK_LIB_FALSE USE_LAPACK_LIB_TRUE @@ -656,6 +658,13 @@ BUILD_ZMM_TRUE EGREP GREP CXXCPP +SIMD_FLAGS +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +ac_ct_CC +CFLAGS +CC RANLIB OPENMP_CXXFLAGS am__fastdepCXX_FALSE @@ -674,6 +683,8 @@ CPPFLAGS LDFLAGS CXXFLAGS CXX +AM_BACKSLASH +AM_DEFAULT_VERBOSITY am__untar am__tar AMTAR @@ -749,6 +760,7 @@ SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking +enable_silent_rules enable_dependency_tracking enable_openmp enable_simd @@ -767,6 +779,8 @@ LDFLAGS LIBS CPPFLAGS CCC +CC +CFLAGS CXXCPP' @@ -1399,6 +1413,8 @@ Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: `make V=1') + --disable-silent-rules verbose build output (undo: `make V=0') --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors --disable-openmp do not use OpenMP @@ -1421,6 +1437,8 @@ Some influential environment variables: LIBS libraries to pass to the linker, e.g. -l CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if you have headers in a nonstandard directory + CC C compiler command + CFLAGS C compiler flags CXXCPP C++ preprocessor Use these variables to override the choices made by `configure' or to help @@ -2028,7 +2046,7 @@ test -n "$target_alias" && test "$program_prefix$program_suffix$program_transform_name" = \ NONENONEs,x,x, && program_prefix=${target_alias}- -am__api_version='1.10' +am__api_version='1.11' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or @@ -2128,16 +2146,33 @@ $as_echo_n "checking whether build environment is sane... " >&6; } # Just in case sleep 1 echo timestamp > conftest.file +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + { { $as_echo "$as_me:$LINENO: error: unsafe absolute working directory name" >&5 +$as_echo "$as_me: error: unsafe absolute working directory name" >&2;} + { (exit 1); exit 1; }; };; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + { { $as_echo "$as_me:$LINENO: error: unsafe srcdir value: \`$srcdir'" >&5 +$as_echo "$as_me: error: unsafe srcdir value: \`$srcdir'" >&2;} + { (exit 1); exit 1; }; };; +esac + # Do `set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( - set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$*" = "X"; then # -L didn't work. - set X `ls -t $srcdir/configure conftest.file` + set X `ls -t "$srcdir/configure" conftest.file` fi rm -f conftest.file if test "$*" != "X $srcdir/configure conftest.file" \ @@ -2181,7 +2216,14 @@ program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` -test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " @@ -2191,6 +2233,115 @@ else $as_echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;} fi +if test x"${install_sh}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using `strip' when the user +# run `make install-strip'. However `strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the `STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_STRIP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:$LINENO: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + { $as_echo "$as_me:$LINENO: checking for a thread-safe mkdir -p" >&5 $as_echo_n "checking for a thread-safe mkdir -p... " >&6; } if test -z "$MKDIR_P"; then @@ -2373,108 +2524,6 @@ AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} -install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} - -# Installed binaries are usually stripped using `strip' when the user -# run `make install-strip'. However `strip' might not be the right -# tool to use in cross-compilation environments, therefore Automake -# will honor the `STRIP' environment variable to overrule this program. -if test "$cross_compiling" != no; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. -set dummy ${ac_tool_prefix}strip; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_STRIP+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$STRIP"; then - ac_cv_prog_STRIP="$STRIP" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_STRIP="${ac_tool_prefix}strip" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -STRIP=$ac_cv_prog_STRIP -if test -n "$STRIP"; then - { $as_echo "$as_me:$LINENO: result: $STRIP" >&5 -$as_echo "$STRIP" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - -fi -if test -z "$ac_cv_prog_STRIP"; then - ac_ct_STRIP=$STRIP - # Extract the first word of "strip", so it can be a program name with args. -set dummy strip; ac_word=$2 -{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_STRIP"; then - ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_prog_ac_ct_STRIP="strip" - $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - -fi -fi -ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP -if test -n "$ac_ct_STRIP"; then - { $as_echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 -$as_echo "$ac_ct_STRIP" >&6; } -else - { $as_echo "$as_me:$LINENO: result: no" >&5 -$as_echo "no" >&6; } -fi - - if test "x$ac_ct_STRIP" = x; then - STRIP=":" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - STRIP=$ac_ct_STRIP - fi -else - STRIP="$ac_cv_prog_STRIP" -fi - -fi -INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" - # We need awk for the "check" target. The system "awk" is bad on # some platforms. # Always define AMTAR for backward compatibility. @@ -2491,6 +2540,17 @@ am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -' ac_config_headers="$ac_config_headers lib/Config.h" +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in +yes) AM_DEFAULT_VERBOSITY=0;; +no) AM_DEFAULT_VERBOSITY=1;; +*) AM_DEFAULT_VERBOSITY=0;; +esac +AM_BACKSLASH='\' { $as_echo "$as_me:$LINENO: @@ -3142,7 +3202,7 @@ ac_config_commands="$ac_config_commands depfiles" am_make=${MAKE-make} cat > confinc << 'END' am__doit: - @echo done + @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. @@ -3153,24 +3213,24 @@ am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf -# We grep out `Entering directory' and `Leaving directory' -# messages which can occur if `w' ends up in MAKEFLAGS. -# In particular we don't look at `^make:' because GNU make might -# be invoked under some other name (usually "gmake"), in which -# case it prints its new name instead of `make'. -if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then - am__include=include - am__quote= - _am_result=GNU -fi +# Ignore all kinds of additional output from `make'. +case `$am_make -s -f confmf 2> /dev/null` in #( +*the\ am__doit\ target*) + am__include=include + am__quote= + _am_result=GNU + ;; +esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf - if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then - am__include=.include - am__quote="\"" - _am_result=BSD - fi + case `$am_make -s -f confmf 2> /dev/null` in #( + *the\ am__doit\ target*) + am__include=.include + am__quote="\"" + _am_result=BSD + ;; + esac fi @@ -3227,6 +3287,11 @@ else if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and @@ -3244,7 +3309,17 @@ else done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested @@ -3254,19 +3329,23 @@ else break fi ;; + msvisualcpp | msvcmsys) + # This compiler won't grok `-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; none) break ;; esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. if depmode=$depmode \ - source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ + source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ - $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && - grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message @@ -3514,7 +3593,2297 @@ else fi #AX_CXX_COMPILE_STDCXX_11(noext, mandatory) -AX_EXT +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } + +# Provide some information about the compiler. +$as_echo "$as_me:$LINENO: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +{ (ac_try="$ac_compiler --version >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler --version >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -v >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler -v >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -V >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler -V >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if test "${ac_cv_c_compiler_gnu+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_compiler_gnu=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_compiler_gnu=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if test "${ac_cv_prog_cc_g+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + CFLAGS="" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if test "${ac_cv_prog_cc_c89+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_c89=$ac_arg +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:$LINENO: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:$LINENO: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac + + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named `D' -- because `-MD' means `put the output + # in D'. + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with + # Solaris 8's {/usr,}/bin/sh. + touch sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with `-c' and `-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle `-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # after this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvisualcpp | msvcmsys) + # This compiler won't grok `-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:$LINENO: checking for x86 cpuid output" >&5 +$as_echo_n "checking for x86 cpuid output... " >&6; } +if test "${ax_cv_gcc_x86_cpuid_+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then + ax_cv_gcc_x86_cpuid_=unknown +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + + int op = , eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ax_cv_gcc_x86_cpuid_=`cat conftest_cpuid`; rm -f conftest_cpuid +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ax_cv_gcc_x86_cpuid_=unknown; rm -f conftest_cpuid +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_" >&5 +$as_echo "$ax_cv_gcc_x86_cpuid_" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:$LINENO: checking for x86-AVX xgetbv output" >&5 +$as_echo_n "checking for x86-AVX xgetbv output... " >&6; } +if test "${ax_cv_gcc_x86_avx_xgetbv_+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then + ax_cv_gcc_x86_avx_xgetbv_=unknown +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + + int op = , eax, edx; + FILE *f; + /* Opcodes for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (op)); + f = fopen("conftest_xgetbv", "w"); if (!f) return 1; + fprintf(f, "%x:%x\n", eax, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ax_cv_gcc_x86_avx_xgetbv_=`cat conftest_xgetbv`; rm -f conftest_xgetbv +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ax_cv_gcc_x86_avx_xgetbv_=unknown; rm -f conftest_xgetbv +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_avx_xgetbv_" >&5 +$as_echo "$ax_cv_gcc_x86_avx_xgetbv_" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + + + case $host_cpu in + powerpc*) + { $as_echo "$as_me:$LINENO: checking whether altivec is supported" >&5 +$as_echo_n "checking whether altivec is supported... " >&6; } +if test "${ax_cv_have_altivec_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then + if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then + ax_cv_have_altivec_ext=yes + fi + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_altivec_ext" >&5 +$as_echo "$ax_cv_have_altivec_ext" >&6; } + + if test "$ax_cv_have_altivec_ext" = yes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_ALTIVEC /**/ +_ACEOF + + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -faltivec" >&5 +$as_echo_n "checking whether C++ compiler accepts -faltivec... " >&6; } +if test "${ax_cv_check_cxxflags___faltivec+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -faltivec" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___faltivec=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___faltivec=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___faltivec" >&5 +$as_echo "$ax_cv_check_cxxflags___faltivec" >&6; } +if test x"$ax_cv_check_cxxflags___faltivec" = xyes; then + SIMD_FLAGS="$SIMD_FLAGS -faltivec" +else + : +fi + + + fi + ;; + + + i[3456]86*|x86_64*|amd64*) + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:$LINENO: checking for x86 cpuid 0x00000001 output" >&5 +$as_echo_n "checking for x86 cpuid 0x00000001 output... " >&6; } +if test "${ax_cv_gcc_x86_cpuid_0x00000001+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then + ax_cv_gcc_x86_cpuid_0x00000001=unknown +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + + int op = 0x00000001, eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ax_cv_gcc_x86_cpuid_0x00000001=`cat conftest_cpuid`; rm -f conftest_cpuid +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ax_cv_gcc_x86_cpuid_0x00000001=unknown; rm -f conftest_cpuid +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x00000001" >&5 +$as_echo "$ax_cv_gcc_x86_cpuid_0x00000001" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + ecx=0 + edx=0 + ebx=0 + if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown"; + then + ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` + edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` + fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:$LINENO: checking for x86 cpuid 0x00000007 output" >&5 +$as_echo_n "checking for x86 cpuid 0x00000007 output... " >&6; } +if test "${ax_cv_gcc_x86_cpuid_0x00000007+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then + ax_cv_gcc_x86_cpuid_0x00000007=unknown +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + + int op = 0x00000007, eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ax_cv_gcc_x86_cpuid_0x00000007=`cat conftest_cpuid`; rm -f conftest_cpuid +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ax_cv_gcc_x86_cpuid_0x00000007=unknown; rm -f conftest_cpuid +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x00000007" >&5 +$as_echo "$ax_cv_gcc_x86_cpuid_0x00000007" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown"; + then + ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2` + fi + + { $as_echo "$as_me:$LINENO: checking whether mmx is supported" >&5 +$as_echo_n "checking whether mmx is supported... " >&6; } +if test "${ax_cv_have_mmx_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_mmx_ext=no + if test "$((0x$edx>>23&0x01))" = 1; then + ax_cv_have_mmx_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_mmx_ext" >&5 +$as_echo "$ax_cv_have_mmx_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether sse is supported" >&5 +$as_echo_n "checking whether sse is supported... " >&6; } +if test "${ax_cv_have_sse_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse_ext=no + if test "$((0x$edx>>25&0x01))" = 1; then + ax_cv_have_sse_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse_ext" >&5 +$as_echo "$ax_cv_have_sse_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether sse2 is supported" >&5 +$as_echo_n "checking whether sse2 is supported... " >&6; } +if test "${ax_cv_have_sse2_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse2_ext=no + if test "$((0x$edx>>26&0x01))" = 1; then + ax_cv_have_sse2_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse2_ext" >&5 +$as_echo "$ax_cv_have_sse2_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether sse3 is supported" >&5 +$as_echo_n "checking whether sse3 is supported... " >&6; } +if test "${ax_cv_have_sse3_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse3_ext=no + if test "$((0x$ecx&0x01))" = 1; then + ax_cv_have_sse3_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse3_ext" >&5 +$as_echo "$ax_cv_have_sse3_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether ssse3 is supported" >&5 +$as_echo_n "checking whether ssse3 is supported... " >&6; } +if test "${ax_cv_have_ssse3_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_ssse3_ext=no + if test "$((0x$ecx>>9&0x01))" = 1; then + ax_cv_have_ssse3_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_ssse3_ext" >&5 +$as_echo "$ax_cv_have_ssse3_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether sse4.1 is supported" >&5 +$as_echo_n "checking whether sse4.1 is supported... " >&6; } +if test "${ax_cv_have_sse41_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse41_ext=no + if test "$((0x$ecx>>19&0x01))" = 1; then + ax_cv_have_sse41_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse41_ext" >&5 +$as_echo "$ax_cv_have_sse41_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether sse4.2 is supported" >&5 +$as_echo_n "checking whether sse4.2 is supported... " >&6; } +if test "${ax_cv_have_sse42_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse42_ext=no + if test "$((0x$ecx>>20&0x01))" = 1; then + ax_cv_have_sse42_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_sse42_ext" >&5 +$as_echo "$ax_cv_have_sse42_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether avx is supported by processor" >&5 +$as_echo_n "checking whether avx is supported by processor... " >&6; } +if test "${ax_cv_have_avx_cpu_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_avx_cpu_ext=no + if test "$((0x$ecx>>28&0x01))" = 1; then + ax_cv_have_avx_cpu_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_avx_cpu_ext" >&5 +$as_echo "$ax_cv_have_avx_cpu_ext" >&6; } + + { $as_echo "$as_me:$LINENO: checking whether avx2 is supported by processor" >&5 +$as_echo_n "checking whether avx2 is supported by processor... " >&6; } +if test "${ax_cv_have_avx2_cpu_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_avx2_cpu_ext=no + if test "$((0x$ebx>>5&0x01))" = 1; then + ax_cv_have_avx2_cpu_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_avx2_cpu_ext" >&5 +$as_echo "$ax_cv_have_avx2_cpu_ext" >&6; } + + + { $as_echo "$as_me:$LINENO: checking whether fma is supported by processor" >&5 +$as_echo_n "checking whether fma is supported by processor... " >&6; } +if test "${ax_cv_have_fma_cpu_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_fma_cpu_ext=no + if test "$((0x$ecx>>12&0x01))" = 1; then + ax_cv_have_fma_cpu_ext=yes + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_fma_cpu_ext" >&5 +$as_echo "$ax_cv_have_fma_cpu_ext" >&6; } + + + if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:$LINENO: checking for x86-AVX xgetbv 0x00000000 output" >&5 +$as_echo_n "checking for x86-AVX xgetbv 0x00000000 output... " >&6; } +if test "${ax_cv_gcc_x86_avx_xgetbv_0x00000000+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then + ax_cv_gcc_x86_avx_xgetbv_0x00000000=unknown +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +int +main () +{ + + int op = 0x00000000, eax, edx; + FILE *f; + /* Opcodes for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (op)); + f = fopen("conftest_xgetbv", "w"); if (!f) return 1; + fprintf(f, "%x:%x\n", eax, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ax_cv_gcc_x86_avx_xgetbv_0x00000000=`cat conftest_xgetbv`; rm -f conftest_xgetbv +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ax_cv_gcc_x86_avx_xgetbv_0x00000000=unknown; rm -f conftest_xgetbv +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_avx_xgetbv_0x00000000" >&5 +$as_echo "$ax_cv_gcc_x86_avx_xgetbv_0x00000000" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + xgetbv_eax="0" + if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then + xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` + fi + + { $as_echo "$as_me:$LINENO: checking whether avx is supported by operating system" >&5 +$as_echo_n "checking whether avx is supported by operating system... " >&6; } +if test "${ax_cv_have_avx_ext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_avx_ext=no + + if test "$((0x$ecx>>27&0x01))" = 1; then + if test "$((0x$xgetbv_eax&0x6))" = 6; then + ax_cv_have_avx_ext=yes + fi + fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_have_avx_ext" >&5 +$as_echo "$ax_cv_have_avx_ext" >&6; } + if test x"$ax_cv_have_avx_ext" = x"no"; then + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports AVX, but your operating system doesn't" >&5 +$as_echo "$as_me: WARNING: Your processor supports AVX, but your operating system doesn't" >&2;} + fi + fi + + if test "$ax_cv_have_mmx_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mmmx" >&5 +$as_echo_n "checking whether C++ compiler accepts -mmmx... " >&6; } +if test "${ax_cv_check_cxxflags___mmmx+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mmmx" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___mmmx=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___mmmx=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mmmx" >&5 +$as_echo "$ax_cv_check_cxxflags___mmmx" >&6; } +if test x"$ax_cv_check_cxxflags___mmmx" = xyes; then + ax_cv_support_mmx_ext=yes +else + : +fi + + + if test x"$ax_cv_support_mmx_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mmmx" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_MMX /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports mmx instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports mmx instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse... " >&6; } +if test "${ax_cv_check_cxxflags___msse+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___msse=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___msse=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse" >&5 +$as_echo "$ax_cv_check_cxxflags___msse" >&6; } +if test x"$ax_cv_check_cxxflags___msse" = xyes; then + ax_cv_support_sse_ext=yes +else + : +fi + + + if test x"$ax_cv_support_sse_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SSE /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse2_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse2" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse2... " >&6; } +if test "${ax_cv_check_cxxflags___msse2+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse2" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___msse2=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___msse2=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse2" >&5 +$as_echo "$ax_cv_check_cxxflags___msse2" >&6; } +if test x"$ax_cv_check_cxxflags___msse2" = xyes; then + ax_cv_support_sse2_ext=yes +else + : +fi + + + if test x"$ax_cv_support_sse2_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse2" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SSE2 /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse2 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse2 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse3_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse3" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse3... " >&6; } +if test "${ax_cv_check_cxxflags___msse3+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse3" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___msse3=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___msse3=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse3" >&5 +$as_echo "$ax_cv_check_cxxflags___msse3" >&6; } +if test x"$ax_cv_check_cxxflags___msse3" = xyes; then + ax_cv_support_sse3_ext=yes +else + : +fi + + + if test x"$ax_cv_support_sse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse3" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SSE3 /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse3 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse3 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_ssse3_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mssse3" >&5 +$as_echo_n "checking whether C++ compiler accepts -mssse3... " >&6; } +if test "${ax_cv_check_cxxflags___mssse3+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mssse3" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___mssse3=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___mssse3=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mssse3" >&5 +$as_echo "$ax_cv_check_cxxflags___mssse3" >&6; } +if test x"$ax_cv_check_cxxflags___mssse3" = xyes; then + ax_cv_support_ssse3_ext=yes +else + : +fi + + + if test x"$ax_cv_support_ssse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mssse3" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SSSE3 /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports ssse3 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports ssse3 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse41_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse4.1" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse4.1... " >&6; } +if test "${ax_cv_check_cxxflags___msse4_1+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse4.1" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___msse4_1=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___msse4_1=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse4_1" >&5 +$as_echo "$ax_cv_check_cxxflags___msse4_1" >&6; } +if test x"$ax_cv_check_cxxflags___msse4_1" = xyes; then + ax_cv_support_sse41_ext=yes +else + : +fi + + + if test x"$ax_cv_support_sse41_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse4.1" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SSE4_1 /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse42_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -msse4.2" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse4.2... " >&6; } +if test "${ax_cv_check_cxxflags___msse4_2+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse4.2" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___msse4_2=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___msse4_2=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___msse4_2" >&5 +$as_echo "$ax_cv_check_cxxflags___msse4_2" >&6; } +if test x"$ax_cv_check_cxxflags___msse4_2" = xyes; then + ax_cv_support_sse42_ext=yes +else + : +fi + + + if test x"$ax_cv_support_sse42_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse4.2" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_SSE4_2 /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_avx_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mavx" >&5 +$as_echo_n "checking whether C++ compiler accepts -mavx... " >&6; } +if test "${ax_cv_check_cxxflags___mavx+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mavx" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___mavx=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___mavx=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mavx" >&5 +$as_echo "$ax_cv_check_cxxflags___mavx" >&6; } +if test x"$ax_cv_check_cxxflags___mavx" = xyes; then + ax_cv_support_avx_ext=yes +else + : +fi + + + if test x"$ax_cv_support_avx_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mavx" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AVX /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports avx instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports avx instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_avx2_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mavx2" >&5 +$as_echo_n "checking whether C++ compiler accepts -mavx2... " >&6; } +if test "${ax_cv_check_cxxflags___mavx2+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mavx2" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___mavx2=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___mavx2=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mavx2" >&5 +$as_echo "$ax_cv_check_cxxflags___mavx2" >&6; } +if test x"$ax_cv_check_cxxflags___mavx2" = xyes; then + ax_cv_support_avx2_ext=yes +else + : +fi + + + if test x"$ax_cv_support_avx2_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mavx2" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_AVX2 /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports avx2 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports avx2 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_fma_ext" = yes; then + { $as_echo "$as_me:$LINENO: checking whether C++ compiler accepts -mfma" >&5 +$as_echo_n "checking whether C++ compiler accepts -mfma... " >&6; } +if test "${ax_cv_check_cxxflags___mfma+set}" = set; then + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mfma" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ax_cv_check_cxxflags___mfma=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ax_cv_check_cxxflags___mfma=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:$LINENO: result: $ax_cv_check_cxxflags___mfma" >&5 +$as_echo "$ax_cv_check_cxxflags___mfma" >&6; } +if test x"$ax_cv_check_cxxflags___mfma" = xyes; then + ax_cv_support_fma_ext=yes +else + : +fi + + + if test x"$ax_cv_support_fma_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mfma" + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FMA /**/ +_ACEOF + + else + { $as_echo "$as_me:$LINENO: WARNING: Your processor supports fma instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports fma instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + ;; + esac + + + # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) @@ -5791,7 +8160,7 @@ cat >>confdefs.h <<\_ACEOF _ACEOF supported="cross compilation" - ac_ZMM=no; + ac_ZMM=yes; ;; IMCI) echo Configuring for IMCI @@ -6169,6 +8538,14 @@ LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then { { $as_echo "$as_me:$LINENO: error: conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." >&5 @@ -6183,6 +8560,13 @@ $as_echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + { { $as_echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&5 +$as_echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." >&2;} + { (exit 1); exit 1; }; } +fi if test -z "${BUILD_ZMM_TRUE}" && test -z "${BUILD_ZMM_FALSE}"; then { { $as_echo "$as_me:$LINENO: error: conditional \"BUILD_ZMM\" was never defined. Usually this means the macro was only invoked conditionally." >&5 @@ -7401,18 +9785,28 @@ $as_echo "$as_me: executing $ac_file commands" >&6;} case $ac_file$ac_mode in - "depfiles":C) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do - # Strip MF so we end up with the name of the file. - mf=`echo "$mf" | sed -e 's/:.*$//'` - # Check whether this is an Automake generated Makefile or not. - # We used to match only the files named `Makefile.in', but - # some people rename them; so instead we look at the file content. - # Grep'ing the first line is not enough: some people post-process - # each Makefile.in and add a new line on top of each file to say so. - # Grep'ing the whole file is not good either: AIX grep has a line - # limit of 2048, but all sed's we know have understand at least 4000. - if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then - dirpart=`$as_dirname -- "$mf" || + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Autoconf 2.62 quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + case $CONFIG_FILES in + *\'*) eval set x "$CONFIG_FILES" ;; + *) set x $CONFIG_FILES ;; + esac + shift + for mf + do + # Strip MF so we end up with the name of the file. + mf=`echo "$mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile or not. + # We used to match only the files named `Makefile.in', but + # some people rename them; so instead we look at the file content. + # Grep'ing the first line is not enough: some people post-process + # each Makefile.in and add a new line on top of each file to say so. + # Grep'ing the whole file is not good either: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then + dirpart=`$as_dirname -- "$mf" || $as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$mf" : 'X\(//\)[^/]' \| \ X"$mf" : 'X\(//\)$' \| \ @@ -7435,28 +9829,28 @@ $as_echo X"$mf" | q } s/.*/./; q'` - else - continue - fi - # Extract the definition of DEPDIR, am__include, and am__quote - # from the Makefile without running `make'. - DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` - test -z "$DEPDIR" && continue - am__include=`sed -n 's/^am__include = //p' < "$mf"` - test -z "am__include" && continue - am__quote=`sed -n 's/^am__quote = //p' < "$mf"` - # When using ansi2knr, U may be empty or an underscore; expand it - U=`sed -n 's/^U = //p' < "$mf"` - # Find all dependency output files, they are included files with - # $(DEPDIR) in their names. We invoke sed twice because it is the - # simplest approach to changing $(DEPDIR) to its actual value in the - # expansion. - for file in `sed -n " - s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ - sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do - # Make sure the directory exists. - test -f "$dirpart/$file" && continue - fdir=`$as_dirname -- "$file" || + else + continue + fi + # Extract the definition of DEPDIR, am__include, and am__quote + # from the Makefile without running `make'. + DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` + test -z "$DEPDIR" && continue + am__include=`sed -n 's/^am__include = //p' < "$mf"` + test -z "am__include" && continue + am__quote=`sed -n 's/^am__quote = //p' < "$mf"` + # When using ansi2knr, U may be empty or an underscore; expand it + U=`sed -n 's/^U = //p' < "$mf"` + # Find all dependency output files, they are included files with + # $(DEPDIR) in their names. We invoke sed twice because it is the + # simplest approach to changing $(DEPDIR) to its actual value in the + # expansion. + for file in `sed -n " + s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ + sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do + # Make sure the directory exists. + test -f "$dirpart/$file" && continue + fdir=`$as_dirname -- "$file" || $as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$file" : 'X\(//\)[^/]' \| \ X"$file" : 'X\(//\)$' \| \ @@ -7479,7 +9873,7 @@ $as_echo X"$file" | q } s/.*/./; q'` - { as_dir=$dirpart/$fdir + { as_dir=$dirpart/$fdir case $as_dir in #( -*) as_dir=./$as_dir;; esac @@ -7520,10 +9914,11 @@ $as_echo X"$as_dir" | } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 $as_echo "$as_me: error: cannot create directory $as_dir" >&2;} { (exit 1); exit 1; }; }; } - # echo "creating $dirpart/$file" - echo '# dummy' > "$dirpart/$file" + # echo "creating $dirpart/$file" + echo '# dummy' > "$dirpart/$file" + done done -done +} ;; esac diff --git a/configure.ac b/configure.ac index e8734bf6..4da6e3d9 100644 --- a/configure.ac +++ b/configure.ac @@ -118,7 +118,7 @@ case ${ac_SIMD} in echo Configuring for AVX512 AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] ) supported="cross compilation" - ac_ZMM=no; + ac_ZMM=yes; ;; IMCI) echo Configuring for IMCI diff --git a/lib/Config.h.in b/lib/Config.h.in index 28533b5d..4d6354fa 100644 --- a/lib/Config.h.in +++ b/lib/Config.h.in @@ -30,6 +30,15 @@ /* GRID_DEFAULT_PRECISION is SINGLE */ #undef GRID_DEFAULT_PRECISION_SINGLE +/* Support Altivec instructions */ +#undef HAVE_ALTIVEC + +/* Support AVX (Advanced Vector Extensions) instructions */ +#undef HAVE_AVX + +/* Support AVX2 (Advanced Vector Extensions 2) instructions */ +#undef HAVE_AVX2 + /* Define to 1 if you have the declaration of `be64toh', and to 0 if you don't. */ #undef HAVE_DECL_BE64TOH @@ -44,6 +53,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_EXECINFO_H +/* Support FMA3 (Fused Multiply-Add) instructions */ +#undef HAVE_FMA + /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY @@ -62,9 +74,30 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Support mmx instructions */ +#undef HAVE_MMX + /* Define to 1 if you have the header file. */ #undef HAVE_MM_MALLOC_H +/* Support SSE (Streaming SIMD Extensions) instructions */ +#undef HAVE_SSE + +/* Support SSE2 (Streaming SIMD Extensions 2) instructions */ +#undef HAVE_SSE2 + +/* Support SSE3 (Streaming SIMD Extensions 3) instructions */ +#undef HAVE_SSE3 + +/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */ +#undef HAVE_SSE4_1 + +/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */ +#undef HAVE_SSE4_2 + +/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */ +#undef HAVE_SSSE3 + /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H diff --git a/lib/Grid.h b/lib/Grid.h index 48a59893..eb2be1d1 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -62,6 +62,7 @@ Author: paboyle #include #include #include +#include #include #include #include diff --git a/lib/PerfCount.h b/lib/PerfCount.h index a45b1e23..c4ee8eea 100644 --- a/lib/PerfCount.h +++ b/lib/PerfCount.h @@ -34,7 +34,7 @@ Author: paboyle #include #include #include - +#include #include #ifdef __linux__ @@ -163,8 +163,8 @@ public: { #ifdef __linux__ if ( fd!= -1) { - ioctl(fd, PERF_EVENT_IOC_RESET, 0); - ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); + ::ioctl(fd, PERF_EVENT_IOC_RESET, 0); + ::ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); } begin =cyclecount(); #else @@ -176,7 +176,7 @@ public: count=0; #ifdef __linux__ if ( fd!= -1) { - ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); + ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ::read(fd, &count, sizeof(long long)); } elapsed = cyclecount() - begin; @@ -187,16 +187,16 @@ public: } void Report(void) { #ifdef __linux__ - printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count); + std::printf("%llu cycles %s = %20llu\n", elapsed , PerformanceCounterConfigs[PCT].name, count); #else - printf("%llu cycles \n", elapsed ); + std::printf("%llu cycles \n", elapsed ); #endif } ~PerformanceCounter() { #ifdef __linux__ - close(fd); + ::close(fd); #endif } diff --git a/lib/Simd.h b/lib/Simd.h index ac3a5f88..27a5ec46 100644 --- a/lib/Simd.h +++ b/lib/Simd.h @@ -42,6 +42,7 @@ Author: paboyle #define _MM_SELECT_FOUR_FOUR(A,B,C,D) ((A<<6)|(B<<4)|(C<<2)|(D)) +#define _MM_SELECT_FOUR_FOUR_STRING(A,B,C,D) "((" #A "<<6)|(" #B "<<4)|(" #C "<<2)|(" #D "))" #define _MM_SELECT_EIGHT_TWO(A,B,C,D,E,F,G,H) ((A<<7)|(B<<6)|(C<<5)|(D<<4)|(E<<3)|(F<<2)|(G<<4)|(H)) #define _MM_SELECT_FOUR_TWO (A,B,C,D) _MM_SELECT_EIGHT_TWO(0,0,0,0,A,B,C,D) #define _MM_SELECT_TWO_TWO (A,B) _MM_SELECT_FOUR_TWO(0,0,A,B) diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 502a28bb..d874e0ac 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -335,69 +335,7 @@ PARALLEL_FOR_LOOP void WilsonFermion::DhopInternalCommsOverlapCompute(StencilImpl & st,DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { - assert((dag==DaggerNo) ||(dag==DaggerYes)); - - Compressor compressor(dag); - - auto handle = st.HaloExchangeBegin(in,compressor); - - bool local = true; - bool nonlocal = false; - if ( dag == DaggerYes ) { - if( HandOptDslash ) { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } else { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } - } else { - if( HandOptDslash ) { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } else { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } - } - - st.HaloExchangeComplete(handle); - - local = false; - nonlocal = true; - if ( dag == DaggerYes ) { - if( HandOptDslash ) { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } else { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } - } else { - if( HandOptDslash ) { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } else { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal); - } - } - } + assert(0); }; diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index d56c994c..b78f030e 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -281,11 +281,7 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { - // if ( Impl::overlapCommsCompute () ) { - // DhopInternalCommsOverlapCompute(st,lo,U,in,out,dag); - // } else { DhopInternalCommsThenCompute(st,lo,U,in,out,dag); - // } } template @@ -368,7 +364,7 @@ PARALLEL_FOR_LOOP sU = lo.Reorder(sU); } sF = s+Ls*sU; - Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out,(uint64_t *)0);// &buf[0] + Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out); } } } @@ -428,130 +424,6 @@ void WilsonFermion5D::DhopInternalCommsOverlapCompute(StencilImpl & st, Le const FermionField &in, FermionField &out,int dag) { assert(0); - // assert((dag==DaggerNo) ||(dag==DaggerYes)); - alltime-=usecond(); - - Compressor compressor(dag); - - // Assume balanced KMP_AFFINITY; this is forced in GridThread.h - - int threads = GridThread::GetThreads(); - int HT = GridThread::GetHyperThreads(); - int cores = GridThread::GetCores(); - int nwork = U._grid->oSites(); - - commtime -=usecond(); - auto handle = st.HaloExchangeBegin(in,compressor); - commtime +=usecond(); - - // Dhop takes the 4d grid from U, and makes a 5d index for fermion - // Not loop ordering and data layout. - // Designed to create - // - per thread reuse in L1 cache for U - // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable. - bool local = true; - bool nonlocal = false; - dslashtime -=usecond(); - if ( dag == DaggerYes ) { - if( this->HandOptDslash ) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - for(int s=0;soSites();ss++){ - { - int sd; - for(sd=0;sdHandOptDslash ) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - for(int s=0;soSites();ss++){ - int sU=ss; - for(int s=0;sHandOptDslash ) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - for(int s=0;soSites();ss++){ - { - int sd; - for(sd=0;sdHandOptDslash ) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - for(int s=0;soSites();ss++){ - int sU=ss; - for(int s=0;s diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 219e1a1d..0690a99f 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -38,216 +38,177 @@ WilsonKernels::WilsonKernels(const ImplParams &p): Base(p) {}; template void WilsonKernels::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; + SiteHalfSpinor *chi_p; SiteHalfSpinor Uchi; SiteSpinor result; StencilEntry *SE; int ptype; - int num = 0; - - result=zero; - /////////////////////////// // Xp /////////////////////////// SE=st.GetEntry(ptype,Xp,sF); - if (local && SE->_is_local ) { + if (SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjXp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjXp(chi,in._odata[SE->_offset]); } - } - - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; + } else { + chi_p=&buf[SE->_offset]; } - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st); - accumReconXp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); + spReconXp(result,Uchi); /////////////////////////// // Yp /////////////////////////// SE=st.GetEntry(ptype,Yp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjYp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjYp(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st); - accumReconYp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); + accumReconYp(result,Uchi); /////////////////////////// // Zp /////////////////////////// SE=st.GetEntry(ptype,Zp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjZp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjZp(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st); - accumReconZp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); + accumReconZp(result,Uchi); /////////////////////////// // Tp /////////////////////////// SE=st.GetEntry(ptype,Tp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjTp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjTp(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st); - accumReconTp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); + accumReconTp(result,Uchi); /////////////////////////// // Xm /////////////////////////// SE=st.GetEntry(ptype,Xm,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjXm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjXm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st); - accumReconXm(result,Uchi); - num++; - } - + Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); + accumReconXm(result,Uchi); + /////////////////////////// // Ym /////////////////////////// SE=st.GetEntry(ptype,Ym,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjYm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjYm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st); - accumReconYm(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); + accumReconYm(result,Uchi); /////////////////////////// // Zm /////////////////////////// SE=st.GetEntry(ptype,Zm,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjZm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjZm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st); - accumReconZm(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); + accumReconZm(result,Uchi); /////////////////////////// // Tm /////////////////////////// SE=st.GetEntry(ptype,Tm,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjTm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjTm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); + accumReconTm(result,Uchi); - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st); - accumReconTm(result,Uchi); - num++; - } - - if ( local ) { - vstream(out._odata[sF],result); - } else if ( num ) { - vstream(out._odata[sF],out._odata[sF]+result); - } + vstream(out._odata[sF],result); }; @@ -255,216 +216,177 @@ void WilsonKernels::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField template void WilsonKernels::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; + SiteHalfSpinor *chi_p; SiteHalfSpinor Uchi; SiteSpinor result; StencilEntry *SE; int ptype; - int num = 0; - - result=zero; - /////////////////////////// // Xp /////////////////////////// SE=st.GetEntry(ptype,Xm,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjXp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjXp(chi,in._odata[SE->_offset]); } - } - - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; + } else { + chi_p=&buf[SE->_offset]; } - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st); - accumReconXp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); + spReconXp(result,Uchi); /////////////////////////// // Yp /////////////////////////// SE=st.GetEntry(ptype,Ym,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjYp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjYp(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st); - accumReconYp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); + accumReconYp(result,Uchi); /////////////////////////// // Zp /////////////////////////// SE=st.GetEntry(ptype,Zm,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjZp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjZp(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st); - accumReconZp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); + accumReconZp(result,Uchi); /////////////////////////// // Tp /////////////////////////// SE=st.GetEntry(ptype,Tm,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjTp(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjTp(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st); - accumReconTp(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); + accumReconTp(result,Uchi); /////////////////////////// // Xm /////////////////////////// SE=st.GetEntry(ptype,Xp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjXm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjXm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st); - accumReconXm(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); + accumReconXm(result,Uchi); /////////////////////////// // Ym /////////////////////////// SE=st.GetEntry(ptype,Yp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjYm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjYm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st); - accumReconYm(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); + accumReconYm(result,Uchi); /////////////////////////// // Zm /////////////////////////// SE=st.GetEntry(ptype,Zp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjZm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjZm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } - - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st); - accumReconZm(result,Uchi); - num++; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); + accumReconZm(result,Uchi); /////////////////////////// // Tm /////////////////////////// SE=st.GetEntry(ptype,Tp,sF); - if (local && SE->_is_local ) { + if ( SE->_is_local ) { + chi_p = χ if ( SE->_permute ) { spProjTm(tmp,in._odata[SE->_offset]); permute(chi,tmp,ptype); } else { spProjTm(chi,in._odata[SE->_offset]); } + } else { + chi_p=&buf[SE->_offset]; } - if ( nonlocal && (!SE->_is_local) ) { - chi=buf[SE->_offset]; - } + Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); + accumReconTm(result,Uchi); - if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) { - Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st); - accumReconTm(result,Uchi); - num++; - } - - if ( local ) { - vstream(out._odata[sF],result); - } else if ( num ) { - vstream(out._odata[sF],out._odata[sF]+result); - } + vstream(out._odata[sF],result); }; template @@ -596,7 +518,7 @@ void WilsonKernels::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, vstream(out._odata[sF],result); } -#if ( ! defined(IMCI) ) +#if ( ! defined(IMCI) && ! defined(AVX512) ) template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index b7698730..68ae4c9e 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -48,11 +48,11 @@ namespace Grid { public: void DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); + int sF,int sU,const FermionField &in, FermionField &out); void DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in,FermionField &out,bool local= true, bool nonlocal=true); + int sF,int sU,const FermionField &in,FermionField &out); void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, @@ -60,15 +60,15 @@ namespace Grid { void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); + int sF,int sU,const FermionField &in, FermionField &out); int DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); + int sF,int sU,const FermionField &in, FermionField &out); int DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); + int sF,int sU,const FermionField &in, FermionField &out); WilsonKernels(const ImplParams &p= ImplParams()); diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index e5174484..700eaec8 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -27,8 +27,8 @@ Author: paboyle *************************************************************************************/ /* END LEGAL */ #include -//#if defined(AVX512) || defined (IMCI) -#if defined (IMCI) +#if defined(AVX512) || defined (IMCI) +//#if defined (IMCI) #include @@ -106,7 +106,7 @@ namespace QCD { template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out,uint64_t *timers) + int ss,int sU,const FermionField &in, FermionField &out) { uint64_t now; uint64_t first ; @@ -341,6 +341,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField template class WilsonKernels; template class WilsonKernels; - + template class WilsonKernels; + template class WilsonKernels; }} #endif diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 5c6eee00..74440f16 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -308,548 +308,11 @@ Author: paboyle namespace Grid { namespace QCD { -#if 0 -template -int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) -{ - // std::cout << "Hand op Dhop "<_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - XP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Xp); - XP_RECON_ACCUM; - num++; - } - - // Yp - SE=st.GetEntry(ptype,Yp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - YP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Yp); - YP_RECON_ACCUM; - num++; - } - - - // Zp - SE=st.GetEntry(ptype,Zp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - ZP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Zp); - ZP_RECON_ACCUM; - num++; - } - - // Tp - SE=st.GetEntry(ptype,Tp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - TP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Tp); - TP_RECON_ACCUM; - num++; - } - - // Xm - SE=st.GetEntry(ptype,Xm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - XM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Xm); - XM_RECON_ACCUM; - num++; - } - - // Ym - SE=st.GetEntry(ptype,Ym,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - YM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Ym); - YM_RECON_ACCUM; - num++; - } - - // Zm - SE=st.GetEntry(ptype,Zm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - ZM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Zm); - ZM_RECON_ACCUM; - num++; - } - - // Tm - SE=st.GetEntry(ptype,Tm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - TM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Tm); - TM_RECON_ACCUM; - num++; - } - - SiteSpinor & ref (out._odata[ss]); - if ( Local ) { - vstream(ref()(0)(0),result_00); - vstream(ref()(0)(1),result_01); - vstream(ref()(0)(2),result_02); - vstream(ref()(1)(0),result_10); - vstream(ref()(1)(1),result_11); - vstream(ref()(1)(2),result_12); - vstream(ref()(2)(0),result_20); - vstream(ref()(2)(1),result_21); - vstream(ref()(2)(2),result_22); - vstream(ref()(3)(0),result_30); - vstream(ref()(3)(1),result_31); - vstream(ref()(3)(2),result_32); - return 1; - } else if ( num ) { - vstream(ref()(0)(0),ref()(0)(0)+result_00); - vstream(ref()(0)(1),ref()(0)(1)+result_01); - vstream(ref()(0)(2),ref()(0)(2)+result_02); - vstream(ref()(1)(0),ref()(1)(0)+result_10); - vstream(ref()(1)(1),ref()(1)(1)+result_11); - vstream(ref()(1)(2),ref()(1)(2)+result_12); - vstream(ref()(2)(0),ref()(2)(0)+result_20); - vstream(ref()(2)(1),ref()(2)(1)+result_21); - vstream(ref()(2)(2),ref()(2)(2)+result_22); - vstream(ref()(3)(0),ref()(3)(0)+result_30); - vstream(ref()(3)(1),ref()(3)(1)+result_31); - vstream(ref()(3)(2),ref()(3)(2)+result_32); - return 1; - } - return 0; -} - - - - -template -int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) -{ - // std::cout << "Hand op Dhop "<_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - XM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Xp); - XM_RECON_ACCUM; - num++; - } - - - // Yp - SE=st.GetEntry(ptype,Yp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - YM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Yp); - YM_RECON_ACCUM; - num++; - } - - - // Zp - SE=st.GetEntry(ptype,Zp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - ZM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Zp); - ZM_RECON_ACCUM; - num++; - } - - // Tp - SE=st.GetEntry(ptype,Tp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - TM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Tp); - TM_RECON_ACCUM; - num++; - } - - // Xm - SE=st.GetEntry(ptype,Xm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - XP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Xm); - XP_RECON_ACCUM; - num++; - } - - // Ym - SE=st.GetEntry(ptype,Ym,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - YP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Ym); - YP_RECON_ACCUM; - num++; - } - - // Zm - SE=st.GetEntry(ptype,Zm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - ZP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Zm); - ZP_RECON_ACCUM; - num++; - } - - // Tm - SE=st.GetEntry(ptype,Tm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - TP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Tm); - TP_RECON_ACCUM; - num++; - } - - SiteSpinor & ref (out._odata[ss]); - if ( Local ) { - vstream(ref()(0)(0),result_00); - vstream(ref()(0)(1),result_01); - vstream(ref()(0)(2),result_02); - vstream(ref()(1)(0),result_10); - vstream(ref()(1)(1),result_11); - vstream(ref()(1)(2),result_12); - vstream(ref()(2)(0),result_20); - vstream(ref()(2)(1),result_21); - vstream(ref()(2)(2),result_22); - vstream(ref()(3)(0),result_30); - vstream(ref()(3)(1),result_31); - vstream(ref()(3)(2),result_32); - return 1; - } else if ( num ) { - vstream(ref()(0)(0),ref()(0)(0)+result_00); - vstream(ref()(0)(1),ref()(0)(1)+result_01); - vstream(ref()(0)(2),ref()(0)(2)+result_02); - vstream(ref()(1)(0),ref()(1)(0)+result_10); - vstream(ref()(1)(1),ref()(1)(1)+result_11); - vstream(ref()(1)(2),ref()(1)(2)+result_12); - vstream(ref()(2)(0),ref()(2)(0)+result_20); - vstream(ref()(2)(1),ref()(2)(1)+result_21); - vstream(ref()(2)(2),ref()(2)(2)+result_22); - vstream(ref()(3)(0),ref()(3)(0)+result_30); - vstream(ref()(3)(1),ref()(3)(1)+result_31); - vstream(ref()(3)(2),ref()(3)(2)+result_32); - return 1; - } - return 0; -} - -#else template int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) + int ss,int sU,const FermionField &in, FermionField &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -1094,7 +557,7 @@ int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField template int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out,bool l, bool nl) + int ss,int sU,const FermionField &in, FermionField &out) { // std::cout << "Hand op Dhop "<::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi } -#endif //////////////////////////////////////////////// // Specialise Gparity to simple implementation //////////////////////////////////////////////// template<> int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { DiracOptDhopSite(st,U,buf,sF,sU,in,out); // returns void, will template override for Wilson Nc=3 //check consistency of return types between these functions and the ones in WilsonKernels.cc @@ -1355,7 +817,7 @@ int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,Doub template<> int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 return 0; @@ -1364,7 +826,7 @@ int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,D template<> int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 return 0; @@ -1373,7 +835,7 @@ int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,Doub template<> int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 return 0; @@ -1383,29 +845,29 @@ int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,D template int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out,bool l,bool n); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl); + int ss,int sU,const FermionField &in, FermionField &out); template int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl); + int ss,int sU,const FermionField &in, FermionField &out); }} diff --git a/lib/simd/Avx512Asm.h b/lib/simd/Avx512Asm.h index c0569c1f..cf15e1c1 100644 --- a/lib/simd/Avx512Asm.h +++ b/lib/simd/Avx512Asm.h @@ -97,16 +97,26 @@ Author: paboyle // CONFIG IMCI/AVX512 ////////////////////////////////////////////////////////////////////////////////////////// +#ifdef IMCI #define ASM_IMCI -#undef ASM_AVX512 +#define MASK_REGS \ + __asm__ ("mov $0xAAAA, %%eax \n"\ + "kmov %%eax, %%k6 \n"\ + "knot %%k6, %%k7 \n" : : : "%eax"); + +#endif +#ifdef AVX512 +#define ASM_AVX512 +#define MASK_REGS \ + __asm__ ("mov $0xAAAA, %%eax \n"\ + "kmovw %%eax, %%k6 \n"\ + "mov $0x5555, %%eax \n"\ + "kmovw %%eax, %%k7 \n" : : : "%eax"); +#endif //////////////////////////////////////////////////////////////////////////////////////////////////// // Opcodes common to AVX512 and IMCI //////////////////////////////////////////////////////////////////////////////////////////////////// -#define MASK_REGS \ - __asm__ ("mov $0xAAAA, %%eax \n"\ - "kmov %%eax, %%k6 \n"\ - "knot %%k6, %%k7 \n" : : : "%eax"); #define VZEROf(A) "vpxorq " #A "," #A "," #A ";\n" #define VZEROd(A) "vpxorq " #A "," #A "," #A ";\n" @@ -137,8 +147,14 @@ Author: paboyle VACCTIMESI2f(A,ACC,tmp) #define VACCTIMESI1MEMf(A,ACC,O,P) "vaddps " #O"*64("#P"),"#A "," #ACC"{%k7}" ";\n" +#ifdef ASM_IMCI #define VACCTIMESI2MEMf(A,ACC,O,P) "vsubrps " #O"*64("#P"),"#A "," #ACC"{%k6}" ";\n" #define VACCTIMESMINUSI1MEMf(A,ACC,O,P) "vsubrps " #O"*64("#P"),"#A "," #ACC"{%k7}" ";\n" +#endif +#ifdef ASM_AVX512 +#define VACCTIMESI2MEMf(A,ACC,O,P) "vsubps " #O"*64("#P"),"#A "," #ACC"{%k6}" ";\n" // FIXME KNOWN BUG INTRODUCED TO FORCE COMPILE CLEAN +#define VACCTIMESMINUSI1MEMf(A,ACC,O,P) "vsubps " #O"*64("#P"),"#A "," #ACC"{%k7}" ";\n" +#endif #define VACCTIMESMINUSI2MEMf(A,ACC,O,P) "vaddps " #O"*64("#P"),"#A "," #ACC"{%k6}" ";\n" #define VACCTIMESId(A,ACC,tmp) \ @@ -163,8 +179,14 @@ Author: paboyle #define VMOVd(A,DEST) "vmovapd " #A ", " #DEST ";\n" // Field prefetch +#ifdef ASM_IMCI #define VPREFETCHNTA(O,A) "vprefetchnta "#O"*64("#A");\n" "vprefetch1 ("#O"+12)*64("#A");\n" #define VPREFETCH(O,A) "vprefetch0 "#O"*64("#A");\n" "vprefetch1 ("#O"+12)*64("#A");\n" +#endif +#ifdef ASM_AVX512 +#define VPREFETCHNTA(O,A) +#define VPREFETCH(O,A) +#endif #define VPREFETCHG(O,A) #define VPREFETCHW(O,A) //"vprefetche0 "#O"*64("#A");\n" "vprefetche1 ("#O"+12)*64("#A");\n" @@ -251,11 +273,11 @@ Author: paboyle #define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n" #define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n" // Swaps Re/Im -#define VSHUFd(A,DEST) "vshufpd $0x5, " #A "," #A "," #DEST ";\n" -#define VSHUFf(A,DEST) "vshufps $0x55," #A "," #A "," #DEST ";\n" +#define VSHUFd(A,DEST) "vshufpd $0x55," #A "," #A "," #DEST ";\n" +#define VSHUFf(A,DEST) "vshufps $0x4e," #A "," #A "," #DEST ";\n" // Memops are useful for optimisation -#define VSHUFMEMd(OFF,A,DEST) "vpshufpd $0x4e, " #OFF"("#A ")," #DEST ";\n" -#define VSHUFMEMf(OFF,A,DEST) "vpshufps $0xb1, " #OFF"("#A ")," #DEST ";\n" +#define VSHUFMEMd(OFF,A,DEST) "vpshufd $0x4e, " #OFF"("#A ")," #DEST ";\n" +#define VSHUFMEMf(OFF,A,DEST) "vpshufd $0xb1, " #OFF"("#A ")," #DEST ";\n" // Merges accumulation for complex dot chain @@ -271,7 +293,7 @@ Author: paboyle #define ZEND2f(Criir,Ciirr, tmp) "vsubps " #Ciirr "," #tmp "," #Criir"{%k7}" ";\n" -#define ZEND2d(Criir,Ciirr, tmp) \ +#define ZEND1d(Criir,Ciirr, tmp) \ "vshufpd $0x33," #Ciirr "," #Criir "," #tmp ";\n"\ "vaddpd " #Criir "," #tmp "," #Criir"{%k6}" ";\n" #define ZEND2d(Criir,Ciirr, tmp) "vsubpd " #Ciirr "," #tmp "," #Criir"{%k7}" ";\n" @@ -311,14 +333,41 @@ Author: paboyle #define VACCTIMESI1d(A,ACC,tmp) "vaddpd " #tmp "," #ACC "," #ACC"{%k7}" ";\n" #define VACCTIMESI2d(A,ACC,tmp) "vsubpd " #tmp "," #ACC "," #ACC"{%k6}" ";\n" -#define VPERM0f(A,B) "vshuff32x4 " #A "," #B "," "#B" ", " #_MM_SELECT_FOUR_FOUR(1,0,3,2) ";\n" -#define VPERM1f(A,B) "vshuff32x4 " #A "," #B "," "#B" ", " #_MM_SELECT_FOUR_FOUR(2,3,0,1) ";\n" -#define VPERM2f(A,B) "vshufps " #A "," #B "," "#B" ", " #_MM_SELECT_FOUR_FOUR(1,0,3,2) ";\n" -#define VPERM3f(A,B) "vshufps " #A "," #B "," "#B" ", " #_MM_SELECT_FOUR_FOUR(2,3,0,1) ";\n" + static inline __m512 Permute0(__m512 in){ + return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2)); + }; + static inline __m512 Permute1(__m512 in){ + return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); + }; + static inline __m512 Permute2(__m512 in){ + return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2)); + }; + static inline __m512 Permute3(__m512 in){ + return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); + }; -#define VPERM0d(A,B) "vshuff64x2 " #A "," #B "," "#B" ", " #_MM_SELECT_FOUR_FOUR(1,0,3,2) ";\n" -#define VPERM1d(A,B) "vshuff64x2 " #A "," #B "," "#B" ", " #_MM_SELECT_FOUR_FOUR(2,3,0,1) ";\n" -#define VPERM2d(A,B) "vshufpd " #A "," #B "," "#B" ", " 0x55 ";\n" + static inline __m512d Permute0(__m512d in){ + return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2)); + }; + static inline __m512d Permute1(__m512d in){ + return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); + }; + static inline __m512d Permute2(__m512d in){ + return _mm512_shuffle_pd(in,in,0x55); + }; + static inline __m512d Permute3(__m512d in){ + return in; + }; + + +#define VPERM0f(A,B) "vshuff32x4 $0x4e," #A "," #B "," #B ";\n" +#define VPERM1f(A,B) "vshuff32x4 $0xb1," #A "," #B "," #B ";\n" +#define VPERM2f(A,B) "vshufps $0x4e," #A "," #B "," #B ";\n" +#define VPERM3f(A,B) "vshufps $0xb1," #A "," #B "," #B ";\n" + +#define VPERM0d(A,B) "vshuff64x2 $0x4e," #A "," #B "," #B ";\n" +#define VPERM1d(A,B) "vshuff64x2 $0xb1," #A "," #B "," #B ";\n" +#define VPERM2d(A,B) "vshufpd $0x55," #A "," #B "," #B ";\n" #define VPERM3d(A,B) VMOVd(A,B) #endif diff --git a/tests/Test_zmm.cc b/tests/Test_zmm.cc index 2c9cb446..94fc45b6 100644 --- a/tests/Test_zmm.cc +++ b/tests/Test_zmm.cc @@ -207,9 +207,9 @@ int main(int argc,char **argv) void TimesIAvx512(void *ptr1,void *ptr3) { __asm__ ("mov $0xAAAA, %%eax " : : :"%eax"); - __asm__ ("kmov %%eax, %%k6 " : : :); - __asm__ ("knot %%k6, %%k7 " : : :); - + __asm__ ("kmovw %%eax, %%k6 " : : :); + __asm__ ("mov $0x5555, %%eax " : : :"%eax"); + __asm__ ("kmovw %%eax, %%k7 " : : :); MASK_REGS; From 21abaf7e915d52c1c2793c755d3145cd19b88b07 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 28 Mar 2016 00:35:45 -0600 Subject: [PATCH 3/7] Gamma sign change --- lib/qcd/action/fermion/WilsonKernelsAsm.cc | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 700eaec8..3dab979b 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -159,7 +159,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - XM_PROJMEM(&plocal[offset]); + XP_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR3; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -169,7 +169,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFXM(Xm,pf); } - XM_RECON; + XP_RECON; // Ym offset = SE->_offset; @@ -182,7 +182,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - YM_PROJMEM(&plocal[offset]); + YP_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR2; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -192,7 +192,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFYM(Ym,pf); } - YM_RECON_ACCUM; + YP_RECON_ACCUM; // Zm offset = SE->_offset; @@ -205,7 +205,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - ZM_PROJMEM(&plocal[offset]); + ZP_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR1; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -215,7 +215,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFZM(Zm,pf); } - ZM_RECON_ACCUM; + ZP_RECON_ACCUM; // Tm offset = SE->_offset; @@ -228,7 +228,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField if ( local ) { - TM_PROJMEM(&plocal[offset]); + TP_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR0; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -238,7 +238,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFTM(Tm,pf); } - TM_RECON_ACCUM; + TP_RECON_ACCUM; // Tp offset = SE->_offset; @@ -251,7 +251,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - TP_PROJMEM(&plocal[offset]); + TM_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR0; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -261,7 +261,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFTP(Tp,pf); } - TP_RECON_ACCUM; + TM_RECON_ACCUM; // Zp offset = SE->_offset; @@ -274,7 +274,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - ZP_PROJMEM(&plocal[offset]); + ZM_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR1; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -284,7 +284,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFZP(Zp,pf); } - ZP_RECON_ACCUM; + ZM_RECON_ACCUM; offset = SE->_offset; @@ -297,7 +297,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - YP_PROJMEM(&plocal[offset]); + YM_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR2; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -307,7 +307,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFYP(Yp,pf); } - YP_RECON_ACCUM; + YM_RECON_ACCUM; // Xp perm = SE->_permute; @@ -322,7 +322,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField else pf=(void *)&pbuf[SE->_offset]; if ( local ) { - XP_PROJMEM(&plocal[offset]); + XM_PROJMEM(&plocal[offset]); if ( perm) { PERMUTE_DIR3; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } @@ -332,7 +332,7 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField { MULT_2SPIN_DIR_PFXP(Xp,pf); } - XP_RECON_ACCUM; + XM_RECON_ACCUM; debug: SAVE_RESULT(&out._odata[ss]); From 02198ac5b59833b7fbd1f811db6336564d07b4e7 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 28 Mar 2016 00:36:17 -0600 Subject: [PATCH 4/7] Tolerance and more coverage --- tests/Test_simd.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index 2d74ba9b..d5025554 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -145,7 +145,7 @@ void Tester(const functor &func) int ok=0; for(int i=0;i0){ + if ( abs(reference[i]-result[i])>1.0e-7){ std::cout< Date: Mon, 28 Mar 2016 00:37:12 -0600 Subject: [PATCH 5/7] AVX512 shaken out under SDE --- tests/Test_zmm.cc | 233 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 225 insertions(+), 8 deletions(-) diff --git a/tests/Test_zmm.cc b/tests/Test_zmm.cc index 94fc45b6..63476d5c 100644 --- a/tests/Test_zmm.cc +++ b/tests/Test_zmm.cc @@ -32,10 +32,15 @@ Author: paboyle using namespace Grid; using namespace Grid::QCD; + +void ZmulF(void *ptr1,void *ptr2,void *ptr3); +void Zmul(void *ptr1,void *ptr2,void *ptr3); void WilsonDslashAvx512(void *ptr1,void *ptr2,void *ptr3); void WilsonDslashAvx512F(void *ptr1,void *ptr2,void *ptr3); void TimesIAvx512F(void *ptr1,void *ptr3); void TimesIAvx512(void *ptr1,void *ptr3); +void TimesMinusIAvx512F(void *ptr1,void *ptr3); +void TimesMinusIAvx512(void *ptr1,void *ptr3); @@ -63,50 +68,106 @@ int main(int argc,char **argv) vColourMatrixD mat; vHalfSpinColourVectorD vec; + vHalfSpinColourVectorD vec1; + vHalfSpinColourVectorD vec2; + vHalfSpinColourVectorD vec3; + vHalfSpinColourVectorD matvec; vHalfSpinColourVectorD ref; vComplexD err; + random(sRNG,vec1); + vec1 = std::complex(0.1,3.0); + random(sRNG,vec2); + vec2=2.0; + random(sRNG,vec3); + + //std::cout << "Zmul vec1"< U(4,UGrid); for(int mu=0;mu(Umu,mu); @@ -157,7 +219,7 @@ int main(int argc,char **argv) } t1=usecond(); - +#if 1 for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ Dw.DhopOE(srce,resulta,0); PerformanceCounter Counter(i); @@ -166,14 +228,28 @@ int main(int argc,char **argv) Counter.Stop(); Counter.Report(); } - resulta = (-0.5) * resulta; +#endif + //resulta = (-0.5) * resulta; std::cout< Date: Mon, 28 Mar 2016 00:38:05 -0600 Subject: [PATCH 6/7] AVX512 shaken out --- lib/simd/Avx512Asm.h | 402 +++++++++++++---------------------------- lib/simd/Grid_avx512.h | 20 +- 2 files changed, 141 insertions(+), 281 deletions(-) diff --git a/lib/simd/Avx512Asm.h b/lib/simd/Avx512Asm.h index cf15e1c1..8363c2ab 100644 --- a/lib/simd/Avx512Asm.h +++ b/lib/simd/Avx512Asm.h @@ -69,6 +69,7 @@ Author: paboyle #define UChi_12 %zmm23 #define Uir %zmm24 +//#define ONE %zmm24 #define Uri %zmm25 #define Z0 %zmm26 @@ -99,23 +100,14 @@ Author: paboyle #ifdef IMCI #define ASM_IMCI -#define MASK_REGS \ - __asm__ ("mov $0xAAAA, %%eax \n"\ - "kmov %%eax, %%k6 \n"\ - "knot %%k6, %%k7 \n" : : : "%eax"); - #endif + #ifdef AVX512 #define ASM_AVX512 -#define MASK_REGS \ - __asm__ ("mov $0xAAAA, %%eax \n"\ - "kmovw %%eax, %%k6 \n"\ - "mov $0x5555, %%eax \n"\ - "kmovw %%eax, %%k7 \n" : : : "%eax"); #endif //////////////////////////////////////////////////////////////////////////////////////////////////// -// Opcodes common to AVX512 and IMCI +// Opcodes common //////////////////////////////////////////////////////////////////////////////////////////////////// #define VZEROf(A) "vpxorq " #A "," #A "," #A ";\n" @@ -146,17 +138,6 @@ Author: paboyle VACCTIMESI1f(A,ACC,tmp) \ VACCTIMESI2f(A,ACC,tmp) -#define VACCTIMESI1MEMf(A,ACC,O,P) "vaddps " #O"*64("#P"),"#A "," #ACC"{%k7}" ";\n" -#ifdef ASM_IMCI -#define VACCTIMESI2MEMf(A,ACC,O,P) "vsubrps " #O"*64("#P"),"#A "," #ACC"{%k6}" ";\n" -#define VACCTIMESMINUSI1MEMf(A,ACC,O,P) "vsubrps " #O"*64("#P"),"#A "," #ACC"{%k7}" ";\n" -#endif -#ifdef ASM_AVX512 -#define VACCTIMESI2MEMf(A,ACC,O,P) "vsubps " #O"*64("#P"),"#A "," #ACC"{%k6}" ";\n" // FIXME KNOWN BUG INTRODUCED TO FORCE COMPILE CLEAN -#define VACCTIMESMINUSI1MEMf(A,ACC,O,P) "vsubps " #O"*64("#P"),"#A "," #ACC"{%k7}" ";\n" -#endif -#define VACCTIMESMINUSI2MEMf(A,ACC,O,P) "vaddps " #O"*64("#P"),"#A "," #ACC"{%k6}" ";\n" - #define VACCTIMESId(A,ACC,tmp) \ VACCTIMESI0d(A,ACC,tmp) \ VACCTIMESI1d(A,ACC,tmp) \ @@ -173,20 +154,12 @@ Author: paboyle VACCTIMESMINUSI2d(A,ACC,tmp) #define LOAD64i(A,ptr) __asm__ ( "movq %0, %" #A : : "r"(ptr) : #A ); -#define LOAD64(A,ptr) LOAD64i(A,ptr) +#define LOAD64(A,ptr) LOAD64i(A,ptr) #define VMOVf(A,DEST) "vmovaps " #A ", " #DEST ";\n" #define VMOVd(A,DEST) "vmovapd " #A ", " #DEST ";\n" -// Field prefetch -#ifdef ASM_IMCI -#define VPREFETCHNTA(O,A) "vprefetchnta "#O"*64("#A");\n" "vprefetch1 ("#O"+12)*64("#A");\n" -#define VPREFETCH(O,A) "vprefetch0 "#O"*64("#A");\n" "vprefetch1 ("#O"+12)*64("#A");\n" -#endif -#ifdef ASM_AVX512 -#define VPREFETCHNTA(O,A) -#define VPREFETCH(O,A) -#endif + #define VPREFETCHG(O,A) #define VPREFETCHW(O,A) //"vprefetche0 "#O"*64("#A");\n" "vprefetche1 ("#O"+12)*64("#A");\n" @@ -232,8 +205,6 @@ Author: paboyle #define ZENDf(Criir,Ciirr, tmp) ZEND1f(Criir,Ciirr, tmp) ZEND2f(Criir,Ciirr, tmp) #define ZENDd(Criir,Ciirr, tmp) ZEND1d(Criir,Ciirr, tmp) ZEND2d(Criir,Ciirr, tmp) -// Need VSHUFMULMEMf,d for KNC -// AVX512 friendly #define ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\ VSHUFMEMf(O,P,tmp) \ VMULMEMf(O,P,B,Biirr) \ @@ -265,100 +236,84 @@ Author: paboyle VMADDd(tmp,C,Criir) //////////////////////////////////////////////////////////////////////////////////////////////////// -// Lane swizzling changed between AVX512 and IMCI and requires arch dependent complex support +// ISA changed between AVX512 and IMCI and requires arch dependent complex support //////////////////////////////////////////////////////////////////////////////////////////////////// -// AVX512 special (Knights Landing) +#define VPREFETCHNTA(O,A) +#define VPREFETCH(O,A) + +#define VSTOREf(OFF,PTR,SRC) "vmovaps " #SRC "," #OFF "*64(" #PTR ")" ";\n" +#define VSTOREd(OFF,PTR,SRC) "vmovapd " #SRC "," #OFF "*64(" #PTR ")" ";\n" + +// Swaps Re/Im ; could unify this with IMCI +#define VSHUFd(A,DEST) "vpshufd $0x4e," #A "," #DEST ";\n" +#define VSHUFf(A,DEST) "vpshufd $0xb1," #A "," #DEST ";\n" +#define VSHUFMEMd(OFF,A,DEST) "vpshufd $0x4e, " #OFF"*64("#A ")," #DEST ";\n" // 32 bit level: 1,0,3,2 +#define VSHUFMEMf(OFF,A,DEST) "vpshufd $0xb1, " #OFF"*64("#A ")," #DEST ";\n" // 32 bit level: 2,3,0,1 + + +//////////////////////////////////////////////////////////// +// Knights Landing specials +//////////////////////////////////////////////////////////// #ifdef ASM_AVX512 -#define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n" -#define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n" -// Swaps Re/Im -#define VSHUFd(A,DEST) "vshufpd $0x55," #A "," #A "," #DEST ";\n" -#define VSHUFf(A,DEST) "vshufps $0x4e," #A "," #A "," #DEST ";\n" -// Memops are useful for optimisation -#define VSHUFMEMd(OFF,A,DEST) "vpshufd $0x4e, " #OFF"("#A ")," #DEST ";\n" -#define VSHUFMEMf(OFF,A,DEST) "vpshufd $0xb1, " #OFF"("#A ")," #DEST ";\n" +#define MASK_REGS \ + __asm__ ("mov $0xAAAA, %%eax \n"\ + "kmovw %%eax, %%k6 \n"\ + "mov $0x5555, %%eax \n"\ + "kmovw %%eax, %%k7 \n" : : : "%eax"); -// Merges accumulation for complex dot chain -// TODO: 12 operation saving: -// # could SWIZ op 18{cdab} and eliminate temporary // 12cycles -// # no use KNL though. Fingour something else there. -// # All swizzles become perms ops, but gain addsub; subadd must use this -// # uint32_t (0x7F << 23 ) -// # uint64_t (0x3FF<< 52 ) ; vpbroadcast -#define ZEND1f(Criir,Ciirr, tmp) \ - "vshufps $0xb1," #Ciirr "," #Criir "," #tmp ";\n"\ - "vaddps " #Criir "," #tmp "," #Criir"{%k6}" ";\n" +// Merges accumulation for complex dot chain; less efficient under avx512 +//ZEND1d(Criir,Ciirr, tmp) "vaddpd " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n" +//ZEND2d(Criir,Ciirr, tmp) "vsubpd " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n" +//ZEND1f(Criir,Ciirr, tmp) "vaddps " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n" +//ZEND2f(Criir,Ciirr, tmp) "vsubps " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n" +#define ZEND1f(Criir,Ciirr, tmp) "vshufps $0xb1," #Criir "," #Criir "," #tmp ";\n"\ + "vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n" -#define ZEND2f(Criir,Ciirr, tmp) "vsubps " #Ciirr "," #tmp "," #Criir"{%k7}" ";\n" +#define ZEND2f(Criir,Ciirr, tmp) "vshufps $0xb1," #Ciirr "," #Ciirr "," #tmp ";\n"\ + "vsubps " #tmp "," #Ciirr "," #Criir"{%k7}" ";\n" -#define ZEND1d(Criir,Ciirr, tmp) \ - "vshufpd $0x33," #Ciirr "," #Criir "," #tmp ";\n"\ - "vaddpd " #Criir "," #tmp "," #Criir"{%k6}" ";\n" -#define ZEND2d(Criir,Ciirr, tmp) "vsubpd " #Ciirr "," #tmp "," #Criir"{%k7}" ";\n" +#define ZEND1d(Criir,Ciirr, tmp) "vshufpd $0x55," #Criir "," #Criir "," #tmp ";\n"\ + "vaddps " #tmp "," #Criir "," #Criir"{%k6}" ";\n" +#define ZEND2d(Criir,Ciirr, tmp) "vshufpd $0x55," #Ciirr "," #Ciirr "," #tmp ";\n"\ + "vsubpd " #tmp "," #Ciirr "," #Criir"{%k7};\n" // ri+ir ; ri+ir,rr-ii + // Further opt possible: KNC -- use swizzle operand ; no addsub. // KNL -- addsub. Saves 6 ops, 12 cycles; KNL cost of loading "1" as only fmaddsub // no swizzle on KNL. -#define VTIMESI0f(A,DEST, Z) VSHUFf(A,DEST) -#define VTIMESI1f(A,DEST, Z) "vaddps " #DEST "," #Z "," #DEST"{%k7}" ";\n" -#define VTIMESI2f(A,DEST, Z) "vsubps " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESI0f(A,DEST, Z) VSHUFf(A,DEST) +#define VTIMESI1f(A,DEST, Z) "vaddps " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESI2f(A,DEST, Z) "vsubps " #DEST "," #Z "," #DEST"{%k7}" ";\n" #define VTIMESI0d(A,DEST, Z) VSHUFd(A,DEST) -#define VTIMESI1d(A,DEST, Z) "vaddpd " #DEST "," #Z "," #DEST"{%k7}" ";\n" -#define VTIMESI2d(A,DEST, Z) "vsubpd " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESI1d(A,DEST, Z) "vaddpd " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESI2d(A,DEST, Z) "vsubpd " #DEST "," #Z "," #DEST"{%k7}" ";\n" #define VTIMESMINUSI0f(A,DEST,Z) VSHUFf(A,DEST) -#define VTIMESMINUSI1f(A,DEST,Z) "vsubps " #DEST "," #Z "," #DEST"{%k7}" ";\n" -#define VTIMESMINUSI2f(A,DEST,Z) "vaddps " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESMINUSI1f(A,DEST,Z) "vsubps " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESMINUSI2f(A,DEST,Z) "vaddps " #DEST "," #Z "," #DEST"{%k7}" ";\n" #define VTIMESMINUSI0d(A,DEST,Z) VSHUFd(A,DEST) -#define VTIMESMINUSI1d(A,DEST,Z) "vsubpd " #DEST "," #Z "," #DEST"{%k7}" ";\n" -#define VTIMESMINUSI2d(A,DEST,Z) "vaddpd " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESMINUSI1d(A,DEST,Z) "vsubpd " #DEST "," #Z "," #DEST"{%k6}" ";\n" +#define VTIMESMINUSI2d(A,DEST,Z) "vaddpd " #DEST "," #Z "," #DEST"{%k7}" ";\n" #define VACCTIMESMINUSI0f(A,ACC,tmp) VSHUFf(A,tmp) -#define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #tmp "," #ACC "," #ACC"{%k7}" ";\n" -#define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #tmp "," #ACC "," #ACC"{%k6}" ";\n" +#define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #tmp "," #ACC "," #ACC"{%k6}" ";\n" +#define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #tmp "," #ACC "," #ACC"{%k7}" ";\n" #define VACCTIMESMINUSI0d(A,ACC,tmp) VSHUFd(A,tmp) -#define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #tmp "," #ACC "," #ACC"{%k7}" ";\n" -#define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #tmp "," #ACC "," #ACC"{%k6}" ";\n" +#define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #tmp "," #ACC "," #ACC"{%k6}" ";\n" +#define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #tmp "," #ACC "," #ACC"{%k7}" ";\n" -#define VACCTIMESI0f(A,ACC,tmp) VSHUFf(A,tmp) -#define VACCTIMESI1f(A,ACC,tmp) "vaddps " #tmp "," #ACC "," #ACC"{%k7}" ";\n" -#define VACCTIMESI2f(A,ACC,tmp) "vsubps " #tmp "," #ACC "," #ACC"{%k6}" ";\n" - -#define VACCTIMESI0d(A,ACC,tmp) VSHUFd(A,tmp) -#define VACCTIMESI1d(A,ACC,tmp) "vaddpd " #tmp "," #ACC "," #ACC"{%k7}" ";\n" -#define VACCTIMESI2d(A,ACC,tmp) "vsubpd " #tmp "," #ACC "," #ACC"{%k6}" ";\n" - - static inline __m512 Permute0(__m512 in){ - return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2)); - }; - static inline __m512 Permute1(__m512 in){ - return _mm512_shuffle_f32x4(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); - }; - static inline __m512 Permute2(__m512 in){ - return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2)); - }; - static inline __m512 Permute3(__m512 in){ - return _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); - }; - - static inline __m512d Permute0(__m512d in){ - return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(1,0,3,2)); - }; - static inline __m512d Permute1(__m512d in){ - return _mm512_shuffle_f64x2(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); - }; - static inline __m512d Permute2(__m512d in){ - return _mm512_shuffle_pd(in,in,0x55); - }; - static inline __m512d Permute3(__m512d in){ - return in; - }; +#define VACCTIMESI0f(A,ACC,tmp) VSHUFf(A,tmp) +#define VACCTIMESI1f(A,ACC,tmp) "vaddps " #tmp "," #ACC "," #ACC"{%k6}" ";\n" +#define VACCTIMESI2f(A,ACC,tmp) "vsubps " #tmp "," #ACC "," #ACC"{%k7}" ";\n" +#define VACCTIMESI0d(A,ACC,tmp) VSHUFd(A,tmp) +#define VACCTIMESI1d(A,ACC,tmp) "vaddpd " #tmp "," #ACC "," #ACC"{%k6}" ";\n" +#define VACCTIMESI2d(A,ACC,tmp) "vsubpd " #tmp "," #ACC "," #ACC"{%k7}" ";\n" #define VPERM0f(A,B) "vshuff32x4 $0x4e," #A "," #B "," #B ";\n" #define VPERM1f(A,B) "vshuff32x4 $0xb1," #A "," #B "," #B ";\n" @@ -372,19 +327,16 @@ Author: paboyle #endif +//////////////////////////////////////////////////////////// // Knights Corner specials +//////////////////////////////////////////////////////////// + #ifdef ASM_IMCI -#define VSTOREf(OFF,PTR,SRC) "vmovnrngoaps " #SRC "," #OFF "*64(" #PTR ")" ";\n" -#define VSTOREd(OFF,PTR,SRC) "vmovnrngoapd " #SRC "," #OFF "*64(" #PTR ")" ";\n" - //#define VSTOREf(OFF,PTR,SRC) "vmovaps " #SRC "," #OFF "*64(" #PTR ")" ";\n" - //#define VSTOREd(OFF,PTR,SRC) "vmovapd " #SRC "," #OFF "*64(" #PTR ")" ";\n" -#define VSHUFf(A,DEST) "vmovaps " #A "{cdab} , " #DEST ";\n" -#define VSHUFd(A,DEST) "vmovapd " #A "{cdab} , " #DEST ";\n" - -// Memops are useful for optimisation -#define VSHUFMEMd(OFF,A,DEST) "vpshufd $0x4e, " #OFF"*64("#A ")," #DEST ";\n" -#define VSHUFMEMf(OFF,A,DEST) "vpshufd $0xb1, " #OFF"*64("#A ")," #DEST ";\n" +#define MASK_REGS \ + __asm__ ("mov $0xAAAA, %%eax \n"\ + "kmov %%eax, %%k6 \n"\ + "knot %%k6, %%k7 \n" : : : "%eax"); #define ZEND1d(Criir,Ciirr, tmp) "vaddpd " #Criir "{cdab} ," #Criir "," #Criir"{%k6}" ";\n" #define ZEND2d(Criir,Ciirr, tmp) "vsubpd " #Ciirr "{cdab} ," #Ciirr "," #Criir"{%k7}" ";\n" @@ -423,12 +375,11 @@ Author: paboyle #define VACCTIMESMINUSI1f(A,ACC,tmp) "vsubps " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n" #define VACCTIMESMINUSI2f(A,ACC,tmp) "vaddps " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n" + // Acc = Acc - i A #define VACCTIMESMINUSI0d(A,ACC,tmp) #define VACCTIMESMINUSI1d(A,ACC,tmp) "vsubpd " #A "{cdab}," #ACC "," #ACC"{%k7}" ";\n" #define VACCTIMESMINUSI2d(A,ACC,tmp) "vaddpd " #A "{cdab}," #ACC "," #ACC"{%k6}" ";\n" -//#define ZENDf(Criir,Ciirr, tmp) - //((1<<6)|(0<<4)|(3<<2)|(2)) == 0100,1110 = 0x4e //((2<<6)|(3<<4)|(0<<2)|(1)) == 1011,0001 = 0xb1 @@ -443,18 +394,15 @@ Author: paboyle #define VPERM3d(A,B) VMOVd(A,B) #endif - // const SiteSpinor * ptr = & in._odata[offset]; #define LOAD_CHIMU(PTR) LOAD_CHIMUi(PTR) -#define LOAD_CHI(PTR) LOAD_CHIi(PTR) +#define LOAD_CHI(PTR) LOAD64(%r8,PTR) __asm__ ( LOAD_CHIi ); #define SAVE_UCHI(PTR) SAVE_UCHIi(PTR) #define SAVE_CHI(PTR) SAVE_CHIi(PTR) #define SAVE_RESULT(PTR) SAVE_RESULTi(PTR) -#define LOAD_CHIMUi(PTR) \ - LOAD64(%r8,PTR) \ - __asm__ (\ +#define LOAD_CHIMUi \ LOAD_CHIMU01i \ LOAD_CHIMU23i ); @@ -486,16 +434,14 @@ Author: paboyle // const SiteHalfSpinor *ptr = &buf[offset]; -#define LOAD_CHIi(PTR) \ - LOAD64(%r8,PTR) \ - __asm__ ( \ +#define LOAD_CHIi \ VLOAD(0,%r8,Chi_00) \ VLOAD(1,%r8,Chi_01) \ VLOAD(2,%r8,Chi_02) \ VLOAD(3,%r8,Chi_10) \ VLOAD(4,%r8,Chi_11) \ - VLOAD(5,%r8,Chi_12) \ - ); + VLOAD(5,%r8,Chi_12) + #define SAVE_UCHIi(PTR) \ LOAD64(%r8,PTR) \ @@ -634,7 +580,6 @@ Author: paboyle ZEND2(UChi_12,Z5,Chi_12) ); #define MULT_2SPIN(ptr) MULT_2SPIN_PF(ptr,ptr,VPREFETCHG); - #define MULT_2SPIN_PFXM(ptr,pf) MULT_2SPIN_PF(ptr,pf,VPREFETCHNTA) #define MULT_2SPIN_PFYM(ptr,pf) MULT_2SPIN_PF(ptr,pf,VPREFETCHNTA) #define MULT_2SPIN_PFZM(ptr,pf) MULT_2SPIN_PF(ptr,pf,VPREFETCHNTA) @@ -716,56 +661,23 @@ Author: paboyle // hspin(0)=fspin(0)+timesI(fspin(3)); // hspin(1)=fspin(1)+timesI(fspin(2)); -//define VTIMESIf(A,DEST, Z) -// These don't work if DEST==Z. FIXME. -#define XP_PROJ __asm__ ( \ - VACCTIMESI(Chimu_30,Chi_00,Z0) \ - VACCTIMESI(Chimu_31,Chi_01,Z1) \ - VACCTIMESI(Chimu_32,Chi_02,Z2) \ - VACCTIMESI(Chimu_20,Chi_10,Z3) \ - VACCTIMESI(Chimu_21,Chi_11,Z4) \ - VACCTIMESI(Chimu_22,Chi_12,Z5) ); - #define XP_PROJMEM(PTR) \ LOAD64(%r8,PTR) \ __asm__ ( \ + LOAD_CHIi \ SHUF_CHIMU23i \ - VACCTIMESI1MEM(Chimu_30,Chi_00,0,%r8) \ - VACCTIMESI1MEM(Chimu_31,Chi_01,1,%r8) \ - VACCTIMESI1MEM(Chimu_32,Chi_02,2,%r8) \ - VACCTIMESI1MEM(Chimu_20,Chi_10,3,%r8) \ - VACCTIMESI1MEM(Chimu_21,Chi_11,4,%r8) \ - VACCTIMESI1MEM(Chimu_22,Chi_12,5,%r8) \ - VACCTIMESI2MEM(Chimu_30,Chi_00,0,%r8) \ - VACCTIMESI2MEM(Chimu_31,Chi_01,1,%r8) \ - VACCTIMESI2MEM(Chimu_32,Chi_02,2,%r8) \ - VACCTIMESI2MEM(Chimu_20,Chi_10,3,%r8) \ - VACCTIMESI2MEM(Chimu_21,Chi_11,4,%r8) \ - VACCTIMESI2MEM(Chimu_22,Chi_12,5,%r8) ); - - -#define YP_PROJ __asm__ ( \ - VSUB(Chimu_30,Chimu_00,Chi_00)\ - VSUB(Chimu_31,Chimu_01,Chi_01)\ - VSUB(Chimu_32,Chimu_02,Chi_02)\ - VADD(Chimu_10,Chimu_20,Chi_10)\ - VADD(Chimu_11,Chimu_21,Chi_11)\ - VADD(Chimu_12,Chimu_22,Chi_12) ); - -#define EVICT_SPINOR(reg) \ - VEVICT(0,reg) \ - VEVICT(1,reg) \ - VEVICT(2,reg) \ - VEVICT(3,reg) \ - VEVICT(4,reg) \ - VEVICT(5,reg) \ - VEVICT(6,reg) \ - VEVICT(7,reg) \ - VEVICT(8,reg) \ - VEVICT(9,reg) \ - VEVICT(9,reg) \ - VEVICT(10,reg) \ - VEVICT(11,reg) + VACCTIMESI1(Chi_00,Chi_00,Chimu_30) \ + VACCTIMESI1(Chi_01,Chi_01,Chimu_31) \ + VACCTIMESI1(Chi_02,Chi_02,Chimu_32) \ + VACCTIMESI1(Chi_10,Chi_10,Chimu_20) \ + VACCTIMESI1(Chi_11,Chi_11,Chimu_21) \ + VACCTIMESI1(Chi_12,Chi_12,Chimu_22) \ + VACCTIMESI2(Chi_00,Chi_00,Chimu_30) \ + VACCTIMESI2(Chi_01,Chi_01,Chimu_31) \ + VACCTIMESI2(Chi_02,Chi_02,Chimu_32) \ + VACCTIMESI2(Chi_10,Chi_10,Chimu_20) \ + VACCTIMESI2(Chi_11,Chi_11,Chimu_21) \ + VACCTIMESI2(Chi_12,Chi_12,Chimu_22) ); #define YP_PROJMEM(ptr) \ @@ -778,43 +690,24 @@ Author: paboyle VADDMEM(6,%r8,Chimu_10,Chi_10) \ VADDMEM(7,%r8,Chimu_11,Chi_11) \ VADDMEM(8,%r8,Chimu_12,Chi_12) ); - // EVICT_SPINOR(%r8) ); - -#define ZP_PROJ __asm__ ( \ - VACCTIMESI(Chimu_20,Chi_00,Z0) \ - VACCTIMESI(Chimu_21,Chi_01,Z1) \ - VACCTIMESI(Chimu_22,Chi_02,Z2) \ - VACCTIMESMINUSI(Chimu_30,Chi_10,Z3) \ - VACCTIMESMINUSI(Chimu_31,Chi_11,Z4) \ - VACCTIMESMINUSI(Chimu_32,Chi_12,Z5) ); #define ZP_PROJMEM(PTR) \ LOAD64(%r8,PTR) \ __asm__ ( \ + LOAD_CHIi \ SHUF_CHIMU23i \ - VACCTIMESI1MEM(Chimu_20,Chi_00,0,%r8) \ - VACCTIMESI1MEM(Chimu_21,Chi_01,1,%r8) \ - VACCTIMESI1MEM(Chimu_22,Chi_02,2,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_30,Chi_10,3,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_31,Chi_11,4,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_32,Chi_12,5,%r8) \ - VACCTIMESI2MEM(Chimu_20,Chi_00,0,%r8) \ - VACCTIMESI2MEM(Chimu_21,Chi_01,1,%r8) \ - VACCTIMESI2MEM(Chimu_22,Chi_02,2,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_30,Chi_10,3,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_31,Chi_11,4,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_32,Chi_12,5,%r8) \ - EVICT_SPINOR(%r8) ); - - - -#define TP_PROJ __asm__ ( \ - VADD(Chimu_00,Chimu_20,Chi_00) \ - VADD(Chimu_01,Chimu_21,Chi_01) \ - VADD(Chimu_02,Chimu_22,Chi_02) \ - VADD(Chimu_10,Chimu_30,Chi_10) \ - VADD(Chimu_11,Chimu_31,Chi_11) \ - VADD(Chimu_12,Chimu_32,Chi_12) ); + VACCTIMESI1(Chi_00,Chi_00,Chimu_20) \ + VACCTIMESI1(Chi_01,Chi_01,Chimu_21) \ + VACCTIMESI1(Chi_02,Chi_02,Chimu_22) \ + VACCTIMESMINUSI1(Chi_10,Chi_10,Chimu_30) \ + VACCTIMESMINUSI1(Chi_11,Chi_11,Chimu_31) \ + VACCTIMESMINUSI1(Chi_12,Chi_12,Chimu_32) \ + VACCTIMESI2(Chi_00,Chi_00,Chimu_20) \ + VACCTIMESI2(Chi_01,Chi_01,Chimu_21) \ + VACCTIMESI2(Chi_02,Chi_02,Chimu_22) \ + VACCTIMESMINUSI2(Chi_10,Chi_10,Chimu_30) \ + VACCTIMESMINUSI2(Chi_11,Chi_11,Chimu_31) \ + VACCTIMESMINUSI2(Chi_12,Chi_12,Chimu_32) ); #define TP_PROJMEM(ptr) \ @@ -826,44 +719,28 @@ Author: paboyle VADDMEM(8,%r8,Chimu_02,Chi_02) \ VADDMEM(9,%r8,Chimu_10,Chi_10) \ VADDMEM(10,%r8,Chimu_11,Chi_11) \ - VADDMEM(11,%r8,Chimu_12,Chi_12) \ - EVICT_SPINOR(%r8) ); - + VADDMEM(11,%r8,Chimu_12,Chi_12) ); // hspin(0)=fspin(0)-timesI(fspin(3)) // hspin(1)=fspin(1)-timesI(fspin(2)) -#define XM_PROJ __asm__ ( \ - VACCTIMESMINUSI(Chimu_30,Chi_00,Z0) \ - VACCTIMESMINUSI(Chimu_31,Chi_01,Z1) \ - VACCTIMESMINUSI(Chimu_32,Chi_02,Z2) \ - VACCTIMESMINUSI(Chimu_20,Chi_10,Z3) \ - VACCTIMESMINUSI(Chimu_21,Chi_11,Z4) \ - VACCTIMESMINUSI(Chimu_22,Chi_12,Z5) ); #define XM_PROJMEM(PTR) \ - LOAD64(%r8,PTR) \ + LOAD64(%r8,PTR)\ __asm__ ( \ SHUF_CHIMU23i \ - VACCTIMESMINUSI1MEM(Chimu_30,Chi_00,0,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_31,Chi_01,1,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_32,Chi_02,2,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_20,Chi_10,3,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_21,Chi_11,4,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_22,Chi_12,5,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_30,Chi_00,0,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_31,Chi_01,1,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_32,Chi_02,2,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_20,Chi_10,3,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_21,Chi_11,4,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_22,Chi_12,5,%r8) ); - -#define YM_PROJ __asm__ ( \ - VADD(Chimu_00,Chimu_30,Chi_00)\ - VADD(Chimu_01,Chimu_31,Chi_01)\ - VADD(Chimu_02,Chimu_32,Chi_02)\ - VSUB(Chimu_20,Chimu_10,Chi_10)\ - VSUB(Chimu_21,Chimu_11,Chi_11)\ - VSUB(Chimu_22,Chimu_12,Chi_12) ); + LOAD_CHIi \ + VACCTIMESMINUSI1(Chi_00,Chi_00,Chimu_30)\ + VACCTIMESMINUSI1(Chi_01,Chi_01,Chimu_31)\ + VACCTIMESMINUSI1(Chi_02,Chi_02,Chimu_32)\ + VACCTIMESMINUSI1(Chi_10,Chi_10,Chimu_20)\ + VACCTIMESMINUSI1(Chi_11,Chi_11,Chimu_21)\ + VACCTIMESMINUSI1(Chi_12,Chi_12,Chimu_22)\ + VACCTIMESMINUSI2(Chi_00,Chi_00,Chimu_30)\ + VACCTIMESMINUSI2(Chi_01,Chi_01,Chimu_31)\ + VACCTIMESMINUSI2(Chi_02,Chi_02,Chimu_32)\ + VACCTIMESMINUSI2(Chi_10,Chi_10,Chimu_20)\ + VACCTIMESMINUSI2(Chi_11,Chi_11,Chimu_21)\ + VACCTIMESMINUSI2(Chi_12,Chi_12,Chimu_22) ); #define YM_PROJMEM(ptr) \ LOAD64(%r8,ptr) \ @@ -874,45 +751,25 @@ Author: paboyle VADDMEM(11,%r8,Chimu_02,Chi_02) \ VSUBMEM(6,%r8,Chimu_10,Chi_10) \ VSUBMEM(7,%r8,Chimu_11,Chi_11) \ - VSUBMEM(8,%r8,Chimu_12,Chi_12) \ - EVICT_SPINOR(%r8) ); - - -#define ZM_PROJ __asm__ ( \ - VACCTIMESMINUSI(Chimu_20,Chi_00,Z0)\ - VACCTIMESMINUSI(Chimu_21,Chi_01,Z1)\ - VACCTIMESMINUSI(Chimu_22,Chi_02,Z2)\ - VACCTIMESI(Chimu_30,Chi_10,Z3)\ - VACCTIMESI(Chimu_31,Chi_11,Z4)\ - VACCTIMESI(Chimu_32,Chi_12,Z5)); + VSUBMEM(8,%r8,Chimu_12,Chi_12) ); #define ZM_PROJMEM(PTR) \ LOAD64(%r8,PTR) \ __asm__ ( \ SHUF_CHIMU23i \ - VACCTIMESMINUSI1MEM(Chimu_20,Chi_00,0,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_21,Chi_01,1,%r8) \ - VACCTIMESMINUSI1MEM(Chimu_22,Chi_02,2,%r8) \ - VACCTIMESI1MEM(Chimu_30,Chi_10,3,%r8) \ - VACCTIMESI1MEM(Chimu_31,Chi_11,4,%r8) \ - VACCTIMESI1MEM(Chimu_32,Chi_12,5,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_20,Chi_00,0,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_21,Chi_01,1,%r8) \ - VACCTIMESMINUSI2MEM(Chimu_22,Chi_02,2,%r8) \ - VACCTIMESI2MEM(Chimu_30,Chi_10,3,%r8) \ - VACCTIMESI2MEM(Chimu_31,Chi_11,4,%r8) \ - VACCTIMESI2MEM(Chimu_32,Chi_12,5,%r8) \ - EVICT_SPINOR(%r8) ); - - -#define TM_PROJ __asm__ ( \ - VSUB(Chimu_20,Chimu_00,Chi_00)\ - VSUB(Chimu_21,Chimu_01,Chi_01)\ - VSUB(Chimu_22,Chimu_02,Chi_02)\ - VSUB(Chimu_30,Chimu_10,Chi_10)\ - VSUB(Chimu_31,Chimu_11,Chi_11)\ - VSUB(Chimu_32,Chimu_12,Chi_12) ); - + LOAD_CHIi \ + VACCTIMESMINUSI1(Chi_00,Chi_00,Chimu_20)\ + VACCTIMESMINUSI1(Chi_01,Chi_01,Chimu_21)\ + VACCTIMESMINUSI1(Chi_02,Chi_02,Chimu_22)\ + VACCTIMESI1(Chi_10,Chi_10,Chimu_30)\ + VACCTIMESI1(Chi_11,Chi_11,Chimu_31)\ + VACCTIMESI1(Chi_12,Chi_12,Chimu_32)\ + VACCTIMESMINUSI2(Chi_00,Chi_00,Chimu_20)\ + VACCTIMESMINUSI2(Chi_01,Chi_01,Chimu_21)\ + VACCTIMESMINUSI2(Chi_02,Chi_02,Chimu_22)\ + VACCTIMESI2(Chi_10,Chi_10,Chimu_30)\ + VACCTIMESI2(Chi_11,Chi_11,Chimu_31)\ + VACCTIMESI2(Chi_12,Chi_12,Chimu_32) ); #define TM_PROJMEM(ptr) \ LOAD64(%r8,ptr) \ @@ -923,8 +780,7 @@ Author: paboyle VSUBMEM(8,%r8,Chimu_02,Chi_02) \ VSUBMEM(9,%r8,Chimu_10,Chi_10) \ VSUBMEM(10,%r8,Chimu_11,Chi_11) \ - VSUBMEM(11,%r8,Chimu_12,Chi_12) \ - EVICT_SPINOR(%r8) ); + VSUBMEM(11,%r8,Chimu_12,Chi_12) ); // fspin(0)=hspin(0) // fspin(1)=hspin(1) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 0a95960e..5d014137 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -246,26 +246,30 @@ namespace Optimization { struct TimesMinusI{ //Complex single inline __m512 operator()(__m512 in, __m512 ret){ - __m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag - return _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(1,0,3,2)); // 0x4E?? + //__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag + //return _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,1,0)); // 0x4E?? + __m512 tmp = _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); + return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); } //Complex double inline __m512d operator()(__m512d in, __m512d ret){ - __m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag - return _mm512_shuffle_pd(tmp,tmp,0x55); + //__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag + //return _mm512_shuffle_pd(tmp,tmp,0x55); + __m512d tmp = _mm512_shuffle_pd(in,in,0x55); + return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp); } }; struct TimesI{ //Complex single inline __m512 operator()(__m512 in, __m512 ret){ - __m512 tmp = _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(1,0,3,2)); - return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp); + __m512 tmp = _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1)); + return _mm512_mask_sub_ps(tmp,0x5555,_mm512_setzero_ps(),tmp); } //Complex double inline __m512d operator()(__m512d in, __m512d ret){ - __m512d tmp = _mm512_shuffle_pd(tmp,tmp,0x55); - return _mm512_mask_sub_pd(tmp,0xaa,_mm512_setzero_pd(),tmp); + __m512d tmp = _mm512_shuffle_pd(in,in,0x55); + return _mm512_mask_sub_pd(tmp,0x55,_mm512_setzero_pd(),tmp); } From 1e355a51e1c81a2ac8c0191f04d7bb39a1bb6570 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 27 Mar 2016 23:46:55 -0700 Subject: [PATCH 7/7] Interface change --- lib/qcd/action/fermion/WilsonKernels.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 0690a99f..b94284f7 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -522,7 +522,7 @@ void WilsonKernels::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal) + int sF,int sU,const FermionField &in, FermionField &out) { DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 }