From d8b05e001db5d71387c828d5d6dad25630e11d76 Mon Sep 17 00:00:00 2001 From: neo Date: Wed, 27 May 2015 18:30:11 +0900 Subject: [PATCH 01/22] Check at configure time if CPU supports the requested SIMD optimization --- .gitignore | 31 + README.md | 4 +- configure | 1872 ++++++++++++++++- configure.ac | 32 +- lib/Grid_config.h | 33 + lib/Grid_config.h.in | 33 + m4/ax_check_compile_flag.m4 | 72 + m4/ax_ext.m4 | 288 +++ m4/ax_gcc_x86_avx_xgetbv.m4 | 79 + m4/ax_gcc_x86_cpuid.m4 | 45 + .../reconfigure_script | 0 11 files changed, 2475 insertions(+), 14 deletions(-) create mode 100644 m4/ax_check_compile_flag.m4 create mode 100644 m4/ax_ext.m4 create mode 100644 m4/ax_gcc_x86_avx_xgetbv.m4 create mode 100644 m4/ax_gcc_x86_cpuid.m4 rename reconfigure_script => scripts/reconfigure_script (100%) diff --git a/.gitignore b/.gitignore index 09504991..82e09bc0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.obj *~ errs +*# # Precompiled Headers *.gch @@ -48,3 +49,33 @@ config.status /stamp-h1 /config.sub /config.guess + + +# Packages # +############ +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases # +###################### +*.log +*.sql +*.sqlite + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db \ No newline at end of file diff --git a/README.md b/README.md index e18ca474..fe1453f7 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi for most programmers. The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported. +Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported. These are presented as @@ -46,3 +46,5 @@ are examples: ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none +For developers: +Use reconfigure_script in the scripts/ directory to create the autotools environment diff --git a/configure b/configure index ccfea6b7..ea0a71c2 100755 --- a/configure +++ b/configure @@ -640,6 +640,13 @@ BUILD_COMMS_MPI_TRUE EGREP GREP CXXCPP +SIMD_FLAGS +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +ac_ct_CC +CFLAGS +CC HAVE_CXX11 RANLIB OPENMP_CXXFLAGS @@ -759,6 +766,8 @@ LDFLAGS LIBS CPPFLAGS CCC +CC +CFLAGS CXXCPP' @@ -1386,7 +1395,7 @@ Optional Features: --disable-dependency-tracking speeds up one-time build --disable-openmp do not use OpenMP - --enable-simd=SSE|AVX|AVX2|AVX512|MIC + --enable-simd=SSE4|AVX|AVX2|AVX512|MIC Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC --enable-comms=none|mpi Select communications @@ -1404,6 +1413,8 @@ Some influential environment variables: LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory + CC C compiler command + CFLAGS C compiler flags CXXCPP C++ preprocessor Use these variables to override the choices made by `configure' or to help @@ -1570,6 +1581,86 @@ fi } # ac_fn_cxx_try_link +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + # ac_fn_cxx_try_cpp LINENO # ------------------------ # Try to preprocess conftest.$ac_ext, and return whether this succeeded. @@ -4166,6 +4257,1749 @@ $as_echo "#define HAVE_CXX11 1" >>confdefs.h fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86 cpuid output" >&5 +$as_echo_n "checking for x86 cpuid output... " >&6; } +if ${ax_cv_gcc_x86_cpuid_+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ax_cv_gcc_x86_cpuid_=unknown +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + int op = , eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_x86_cpuid_=`cat conftest_cpuid`; rm -f conftest_cpuid +else + ax_cv_gcc_x86_cpuid_=unknown; rm -f conftest_cpuid +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_cpuid_" >&5 +$as_echo "$ax_cv_gcc_x86_cpuid_" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86-AVX xgetbv output" >&5 +$as_echo_n "checking for x86-AVX xgetbv output... " >&6; } +if ${ax_cv_gcc_x86_avx_xgetbv_+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ax_cv_gcc_x86_avx_xgetbv_=unknown +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + int op = , eax, edx; + FILE *f; + /* Opcodes for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx), + : "c" (op)); + f = fopen("conftest_xgetbv", "w"); if (!f) return 1; + fprintf(f, "%x:%x\n", eax, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_x86_avx_xgetbv_=`cat conftest_xgetbv`; rm -f conftest_xgetbv +else + ax_cv_gcc_x86_avx_xgetbv_=unknown; rm -f conftest_xgetbv +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_avx_xgetbv_" >&5 +$as_echo "$ax_cv_gcc_x86_avx_xgetbv_" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + + + case $host_cpu in + powerpc*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether altivec is supported" >&5 +$as_echo_n "checking whether altivec is supported... " >&6; } +if ${ax_cv_have_altivec_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then + if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then + ax_cv_have_altivec_ext=yes + fi + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_altivec_ext" >&5 +$as_echo "$ax_cv_have_altivec_ext" >&6; } + + if test "$ax_cv_have_altivec_ext" = yes; then + +$as_echo "#define HAVE_ALTIVEC /**/" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -faltivec" >&5 +$as_echo_n "checking whether C++ compiler accepts -faltivec... " >&6; } +if ${ax_cv_check_cxxflags___faltivec+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -faltivec" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___faltivec=yes +else + ax_cv_check_cxxflags___faltivec=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___faltivec" >&5 +$as_echo "$ax_cv_check_cxxflags___faltivec" >&6; } +if test x"$ax_cv_check_cxxflags___faltivec" = xyes; then : + SIMD_FLAGS="$SIMD_FLAGS -faltivec" +else + : +fi + + fi + ;; + + + i[3456]86*|x86_64*|amd64*) + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86 cpuid 0x00000001 output" >&5 +$as_echo_n "checking for x86 cpuid 0x00000001 output... " >&6; } +if ${ax_cv_gcc_x86_cpuid_0x00000001+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ax_cv_gcc_x86_cpuid_0x00000001=unknown +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + int op = 0x00000001, eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_x86_cpuid_0x00000001=`cat conftest_cpuid`; rm -f conftest_cpuid +else + ax_cv_gcc_x86_cpuid_0x00000001=unknown; rm -f conftest_cpuid +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_cpuid_0x00000001" >&5 +$as_echo "$ax_cv_gcc_x86_cpuid_0x00000001" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + ecx=0 + edx=0 + ebx=0 + if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown"; + then + ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` + edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` + fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86 cpuid 0x00000007 output" >&5 +$as_echo_n "checking for x86 cpuid 0x00000007 output... " >&6; } +if ${ax_cv_gcc_x86_cpuid_0x00000007+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ax_cv_gcc_x86_cpuid_0x00000007=unknown +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + int op = 0x00000007, eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_x86_cpuid_0x00000007=`cat conftest_cpuid`; rm -f conftest_cpuid +else + ax_cv_gcc_x86_cpuid_0x00000007=unknown; rm -f conftest_cpuid +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_cpuid_0x00000007" >&5 +$as_echo "$ax_cv_gcc_x86_cpuid_0x00000007" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown"; + then + ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2` + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mmx is supported" >&5 +$as_echo_n "checking whether mmx is supported... " >&6; } +if ${ax_cv_have_mmx_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_mmx_ext=no + if test "$((0x$edx>>23&0x01))" = 1; then + ax_cv_have_mmx_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_mmx_ext" >&5 +$as_echo "$ax_cv_have_mmx_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sse is supported" >&5 +$as_echo_n "checking whether sse is supported... " >&6; } +if ${ax_cv_have_sse_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse_ext=no + if test "$((0x$edx>>25&0x01))" = 1; then + ax_cv_have_sse_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_sse_ext" >&5 +$as_echo "$ax_cv_have_sse_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sse2 is supported" >&5 +$as_echo_n "checking whether sse2 is supported... " >&6; } +if ${ax_cv_have_sse2_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse2_ext=no + if test "$((0x$edx>>26&0x01))" = 1; then + ax_cv_have_sse2_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_sse2_ext" >&5 +$as_echo "$ax_cv_have_sse2_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sse3 is supported" >&5 +$as_echo_n "checking whether sse3 is supported... " >&6; } +if ${ax_cv_have_sse3_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse3_ext=no + if test "$((0x$ecx&0x01))" = 1; then + ax_cv_have_sse3_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_sse3_ext" >&5 +$as_echo "$ax_cv_have_sse3_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ssse3 is supported" >&5 +$as_echo_n "checking whether ssse3 is supported... " >&6; } +if ${ax_cv_have_ssse3_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_ssse3_ext=no + if test "$((0x$ecx>>9&0x01))" = 1; then + ax_cv_have_ssse3_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_ssse3_ext" >&5 +$as_echo "$ax_cv_have_ssse3_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sse4.1 is supported" >&5 +$as_echo_n "checking whether sse4.1 is supported... " >&6; } +if ${ax_cv_have_sse41_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse41_ext=no + if test "$((0x$ecx>>19&0x01))" = 1; then + ax_cv_have_sse41_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_sse41_ext" >&5 +$as_echo "$ax_cv_have_sse41_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sse4.2 is supported" >&5 +$as_echo_n "checking whether sse4.2 is supported... " >&6; } +if ${ax_cv_have_sse42_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_sse42_ext=no + if test "$((0x$ecx>>20&0x01))" = 1; then + ax_cv_have_sse42_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_sse42_ext" >&5 +$as_echo "$ax_cv_have_sse42_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether avx is supported by processor" >&5 +$as_echo_n "checking whether avx is supported by processor... " >&6; } +if ${ax_cv_have_avx_cpu_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_avx_cpu_ext=no + if test "$((0x$ecx>>28&0x01))" = 1; then + ax_cv_have_avx_cpu_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_avx_cpu_ext" >&5 +$as_echo "$ax_cv_have_avx_cpu_ext" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether avx2 is supported by processor" >&5 +$as_echo_n "checking whether avx2 is supported by processor... " >&6; } +if ${ax_cv_have_avx2_cpu_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_avx2_cpu_ext=no + if test "$((0x$ebx>>5&0x01))" = 1; then + ax_cv_have_avx2_cpu_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_avx2_cpu_ext" >&5 +$as_echo "$ax_cv_have_avx2_cpu_ext" >&6; } + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether fma is supported by processor" >&5 +$as_echo_n "checking whether fma is supported by processor... " >&6; } +if ${ax_cv_have_fma_cpu_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_fma_cpu_ext=no + if test "$((0x$ecx>>12&0x01))" = 1; then + ax_cv_have_fma_cpu_ext=yes + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_fma_cpu_ext" >&5 +$as_echo "$ax_cv_have_fma_cpu_ext" >&6; } + + + if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for x86-AVX xgetbv 0x00000000 output" >&5 +$as_echo_n "checking for x86-AVX xgetbv 0x00000000 output... " >&6; } +if ${ax_cv_gcc_x86_avx_xgetbv_0x00000000+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ax_cv_gcc_x86_avx_xgetbv_0x00000000=unknown +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + int op = 0x00000000, eax, edx; + FILE *f; + /* Opcodes for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx), + : "c" (op)); + f = fopen("conftest_xgetbv", "w"); if (!f) return 1; + fprintf(f, "%x:%x\n", eax, edx); + fclose(f); + return 0; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ax_cv_gcc_x86_avx_xgetbv_0x00000000=`cat conftest_xgetbv`; rm -f conftest_xgetbv +else + ax_cv_gcc_x86_avx_xgetbv_0x00000000=unknown; rm -f conftest_xgetbv +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_gcc_x86_avx_xgetbv_0x00000000" >&5 +$as_echo "$ax_cv_gcc_x86_avx_xgetbv_0x00000000" >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + xgetbv_eax="0" + if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then + xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether avx is supported by operating system" >&5 +$as_echo_n "checking whether avx is supported by operating system... " >&6; } +if ${ax_cv_have_avx_ext+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_cv_have_avx_ext=no + + if test "$((0x$ecx>>27&0x01))" = 1; then + if test "$((0x$xgetbv_eax&0x6))" = 6; then + ax_cv_have_avx_ext=yes + fi + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_have_avx_ext" >&5 +$as_echo "$ax_cv_have_avx_ext" >&6; } + if test x"$ax_cv_have_avx_ext" = x"no"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports AVX, but your operating system doesn't" >&5 +$as_echo "$as_me: WARNING: Your processor supports AVX, but your operating system doesn't" >&2;} + fi + fi + + if test "$ax_cv_have_mmx_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -mmmx" >&5 +$as_echo_n "checking whether C++ compiler accepts -mmmx... " >&6; } +if ${ax_cv_check_cxxflags___mmmx+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mmmx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___mmmx=yes +else + ax_cv_check_cxxflags___mmmx=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___mmmx" >&5 +$as_echo "$ax_cv_check_cxxflags___mmmx" >&6; } +if test x"$ax_cv_check_cxxflags___mmmx" = xyes; then : + ax_cv_support_mmx_ext=yes +else + : +fi + + if test x"$ax_cv_support_mmx_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mmmx" + +$as_echo "#define HAVE_MMX /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports mmx instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports mmx instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -msse" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse... " >&6; } +if ${ax_cv_check_cxxflags___msse+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___msse=yes +else + ax_cv_check_cxxflags___msse=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___msse" >&5 +$as_echo "$ax_cv_check_cxxflags___msse" >&6; } +if test x"$ax_cv_check_cxxflags___msse" = xyes; then : + ax_cv_support_sse_ext=yes +else + : +fi + + if test x"$ax_cv_support_sse_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse" + +$as_echo "#define HAVE_SSE /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports sse instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse2_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -msse2" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse2... " >&6; } +if ${ax_cv_check_cxxflags___msse2+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___msse2=yes +else + ax_cv_check_cxxflags___msse2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___msse2" >&5 +$as_echo "$ax_cv_check_cxxflags___msse2" >&6; } +if test x"$ax_cv_check_cxxflags___msse2" = xyes; then : + ax_cv_support_sse2_ext=yes +else + : +fi + + if test x"$ax_cv_support_sse2_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse2" + +$as_echo "#define HAVE_SSE2 /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports sse2 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse2 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse3_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -msse3" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse3... " >&6; } +if ${ax_cv_check_cxxflags___msse3+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___msse3=yes +else + ax_cv_check_cxxflags___msse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___msse3" >&5 +$as_echo "$ax_cv_check_cxxflags___msse3" >&6; } +if test x"$ax_cv_check_cxxflags___msse3" = xyes; then : + ax_cv_support_sse3_ext=yes +else + : +fi + + if test x"$ax_cv_support_sse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse3" + +$as_echo "#define HAVE_SSE3 /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports sse3 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse3 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_ssse3_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -mssse3" >&5 +$as_echo_n "checking whether C++ compiler accepts -mssse3... " >&6; } +if ${ax_cv_check_cxxflags___mssse3+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mssse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___mssse3=yes +else + ax_cv_check_cxxflags___mssse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___mssse3" >&5 +$as_echo "$ax_cv_check_cxxflags___mssse3" >&6; } +if test x"$ax_cv_check_cxxflags___mssse3" = xyes; then : + ax_cv_support_ssse3_ext=yes +else + : +fi + + if test x"$ax_cv_support_ssse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mssse3" + +$as_echo "#define HAVE_SSSE3 /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports ssse3 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports ssse3 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse41_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -msse4.1" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse4.1... " >&6; } +if ${ax_cv_check_cxxflags___msse4_1+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse4.1" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___msse4_1=yes +else + ax_cv_check_cxxflags___msse4_1=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___msse4_1" >&5 +$as_echo "$ax_cv_check_cxxflags___msse4_1" >&6; } +if test x"$ax_cv_check_cxxflags___msse4_1" = xyes; then : + ax_cv_support_sse41_ext=yes +else + : +fi + + if test x"$ax_cv_support_sse41_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse4.1" + +$as_echo "#define HAVE_SSE4_1 /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_sse42_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -msse4.2" >&5 +$as_echo_n "checking whether C++ compiler accepts -msse4.2... " >&6; } +if ${ax_cv_check_cxxflags___msse4_2+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___msse4_2=yes +else + ax_cv_check_cxxflags___msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___msse4_2" >&5 +$as_echo "$ax_cv_check_cxxflags___msse4_2" >&6; } +if test x"$ax_cv_check_cxxflags___msse4_2" = xyes; then : + ax_cv_support_sse42_ext=yes +else + : +fi + + if test x"$ax_cv_support_sse42_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse4.2" + +$as_echo "#define HAVE_SSE4_2 /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_avx_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -mavx" >&5 +$as_echo_n "checking whether C++ compiler accepts -mavx... " >&6; } +if ${ax_cv_check_cxxflags___mavx+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___mavx=yes +else + ax_cv_check_cxxflags___mavx=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___mavx" >&5 +$as_echo "$ax_cv_check_cxxflags___mavx" >&6; } +if test x"$ax_cv_check_cxxflags___mavx" = xyes; then : + ax_cv_support_avx_ext=yes +else + : +fi + + if test x"$ax_cv_support_avx_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mavx" + +$as_echo "#define HAVE_AVX /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports avx instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports avx instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_avx2_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -mavx2" >&5 +$as_echo_n "checking whether C++ compiler accepts -mavx2... " >&6; } +if ${ax_cv_check_cxxflags___mavx2+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___mavx2=yes +else + ax_cv_check_cxxflags___mavx2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___mavx2" >&5 +$as_echo "$ax_cv_check_cxxflags___mavx2" >&6; } +if test x"$ax_cv_check_cxxflags___mavx2" = xyes; then : + ax_cv_support_avx2_ext=yes +else + : +fi + + if test x"$ax_cv_support_avx2_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mavx2" + +$as_echo "#define HAVE_AVX2 /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports avx2 instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports avx2 instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + if test "$ax_cv_have_fma_ext" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C++ compiler accepts -mfma" >&5 +$as_echo_n "checking whether C++ compiler accepts -mfma... " >&6; } +if ${ax_cv_check_cxxflags___mfma+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CXXFLAGS + CXXFLAGS="$CXXFLAGS -mfma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ax_cv_check_cxxflags___mfma=yes +else + ax_cv_check_cxxflags___mfma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CXXFLAGS=$ax_check_save_flags +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cxxflags___mfma" >&5 +$as_echo "$ax_cv_check_cxxflags___mfma" >&6; } +if test x"$ax_cv_check_cxxflags___mfma" = xyes; then : + ax_cv_support_fma_ext=yes +else + : +fi + + if test x"$ax_cv_support_fma_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mfma" + +$as_echo "#define HAVE_FMA /**/" >>confdefs.h + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor supports fma instructions but not your compiler, can you try another compiler?" >&5 +$as_echo "$as_me: WARNING: Your processor supports fma instructions but not your compiler, can you try another compiler?" >&2;} + fi + fi + + ;; + esac + + # Checks for libraries. @@ -4834,30 +6668,48 @@ else fi +supported=no + case ${ac_SIMD} in SSE4) echo Configuring for SSE4 - + if test x"$ax_cv_support_ssse3_ext" = x"yes"; then $as_echo "#define SSE4 1" >>confdefs.h + supported=yes + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support SSE4 instructions" >&5 +$as_echo "$as_me: WARNING: Your processor does not support SSE4 instructions" >&2;} + fi ;; AVX) echo Configuring for AVX - + if test x"$ax_cv_support_avx_ext" = x"yes"; then $as_echo "#define AVX1 1" >>confdefs.h + supported=yes + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support AVX instructions" >&5 +$as_echo "$as_me: WARNING: Your processor does not support AVX instructions" >&2;} + fi ;; AVX2) echo Configuring for AVX2 - + if test x"$ax_cv_support_avx2_ext" = x"yes"; then $as_echo "#define AVX2 1" >>confdefs.h + supported=yes + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your processor does not support AVX2 instructions" >&5 +$as_echo "$as_me: WARNING: Your processor does not support AVX2 instructions" >&2;} + fi ;; AVX512|MIC) echo Configuring for AVX512 and MIC $as_echo "#define AVX512 1" >>confdefs.h + supported=yes ;; *) as_fn_error $? "${ac_SIMD} unsupported --enable-simd option" "$LINENO" 5; @@ -5067,7 +6919,9 @@ ac_config_files="$ac_config_files docs/doxy.cfg" fi - +echo +echo Creating configuration files +echo ::::::::::::::::::::::::::::::::::::::::::: ac_config_files="$ac_config_files Makefile" ac_config_files="$ac_config_files lib/Makefile" @@ -5209,6 +7063,10 @@ if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${BUILD_COMMS_MPI_TRUE}" && test -z "${BUILD_COMMS_MPI_FALSE}"; then as_fn_error $? "conditional \"BUILD_COMMS_MPI\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -6561,9 +8419,9 @@ The following features are enabled: - os (target) : $target_os - build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` - graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` - +- Supported SIMD flags : $SIMD_FLAGS ---------------------------------------------------------- -- enabled simd support : ${ac_SIMD} +- enabled simd support : ${ac_SIMD} (supported: $supported ) - communications type : ${ac_COMMS} diff --git a/configure.ac b/configure.ac index 5dcbea36..00622bb8 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Project Grid package # -# Time-stamp: <2015-05-26 17:18:54 neo> +# Time-stamp: <2015-05-27 18:29:04 neo> AC_PREREQ([2.63]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) @@ -27,7 +27,7 @@ AC_PROG_CXX AC_OPENMP AC_PROG_RANLIB AX_CXX_COMPILE_STDCXX_11(noext, mandatory) - +AX_EXT # Checks for libraries. #AX_GCC_VAR_ATTRIBUTE(aligned) @@ -69,26 +69,44 @@ Info at: http://www.mpfr.org/)]) -AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE|AVX|AVX2|AVX512|MIC],\ +AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\ [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\ [ac_SIMD=${enable_simd}],[ac_SIMD=AVX2]) +supported=no + case ${ac_SIMD} in SSE4) echo Configuring for SSE4 + if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4 AC_DEFINE([SSE4],[1],[SSE4] ) + supported=yes + else + AC_MSG_WARN([Your processor does not support SSE4 instructions]) + fi ;; AVX) echo Configuring for AVX + if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX AC_DEFINE([AVX1],[1],[AVX] ) + supported=yes + else + AC_MSG_WARN([Your processor does not support AVX instructions]) + fi ;; AVX2) echo Configuring for AVX2 + if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2 AC_DEFINE([AVX2],[1],[AVX2] ) + supported=yes + else + AC_MSG_WARN([Your processor does not support AVX2 instructions]) + fi ;; AVX512|MIC) echo Configuring for AVX512 and MIC AC_DEFINE([AVX512],[1],[AVX512] ) + supported=yes ;; *) AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]); @@ -129,7 +147,9 @@ then AC_CONFIG_FILES([docs/doxy.cfg]) fi - +echo +echo Creating configuration files +echo ::::::::::::::::::::::::::::::::::::::::::: AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(lib/Makefile) AC_CONFIG_FILES(tests/Makefile) @@ -150,9 +170,9 @@ The following features are enabled: - os (target) : $target_os - build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` - graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` - +- Supported SIMD flags : $SIMD_FLAGS ---------------------------------------------------------- -- enabled simd support : ${ac_SIMD} +- enabled simd support : ${ac_SIMD} (supported: $supported ) - communications type : ${ac_COMMS} diff --git a/lib/Grid_config.h b/lib/Grid_config.h index 2397894f..914bd06c 100644 --- a/lib/Grid_config.h +++ b/lib/Grid_config.h @@ -16,6 +16,15 @@ /* GRID_COMMS_NONE */ #define GRID_COMMS_NONE 1 +/* Support Altivec instructions */ +/* #undef HAVE_ALTIVEC */ + +/* Support AVX (Advanced Vector Extensions) instructions */ +/* #undef HAVE_AVX */ + +/* Support AVX2 (Advanced Vector Extensions 2) instructions */ +/* #undef HAVE_AVX2 */ + /* define if the compiler supports basic C++11 syntax */ /* #undef HAVE_CXX11 */ @@ -30,6 +39,9 @@ /* Define to 1 if you have the header file. */ #define HAVE_ENDIAN_H 1 +/* Support FMA3 (Fused Multiply-Add) instructions */ +/* #undef HAVE_FMA */ + /* Define to 1 if you have the `gettimeofday' function. */ #define HAVE_GETTIMEOFDAY 1 @@ -54,9 +66,30 @@ /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 +/* Support mmx instructions */ +#define HAVE_MMX /**/ + /* Define to 1 if you have the header file. */ #define HAVE_MM_MALLOC_H 1 +/* Support SSE (Streaming SIMD Extensions) instructions */ +#define HAVE_SSE /**/ + +/* Support SSE2 (Streaming SIMD Extensions 2) instructions */ +#define HAVE_SSE2 /**/ + +/* Support SSE3 (Streaming SIMD Extensions 3) instructions */ +#define HAVE_SSE3 /**/ + +/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */ +#define HAVE_SSE4_1 /**/ + +/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */ +#define HAVE_SSE4_2 /**/ + +/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */ +#define HAVE_SSSE3 /**/ + /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 diff --git a/lib/Grid_config.h.in b/lib/Grid_config.h.in index 6f05d6cb..8c958c82 100644 --- a/lib/Grid_config.h.in +++ b/lib/Grid_config.h.in @@ -15,6 +15,15 @@ /* GRID_COMMS_NONE */ #undef GRID_COMMS_NONE +/* Support Altivec instructions */ +#undef HAVE_ALTIVEC + +/* Support AVX (Advanced Vector Extensions) instructions */ +#undef HAVE_AVX + +/* Support AVX2 (Advanced Vector Extensions 2) instructions */ +#undef HAVE_AVX2 + /* define if the compiler supports basic C++11 syntax */ #undef HAVE_CXX11 @@ -29,6 +38,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_ENDIAN_H +/* Support FMA3 (Fused Multiply-Add) instructions */ +#undef HAVE_FMA + /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY @@ -53,9 +65,30 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Support mmx instructions */ +#undef HAVE_MMX + /* Define to 1 if you have the header file. */ #undef HAVE_MM_MALLOC_H +/* Support SSE (Streaming SIMD Extensions) instructions */ +#undef HAVE_SSE + +/* Support SSE2 (Streaming SIMD Extensions 2) instructions */ +#undef HAVE_SSE2 + +/* Support SSE3 (Streaming SIMD Extensions 3) instructions */ +#undef HAVE_SSE3 + +/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */ +#undef HAVE_SSE4_1 + +/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */ +#undef HAVE_SSE4_2 + +/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */ +#undef HAVE_SSSE3 + /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 new file mode 100644 index 00000000..c3a8d695 --- /dev/null +++ b/m4/ax_check_compile_flag.m4 @@ -0,0 +1,72 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) +# +# DESCRIPTION +# +# Check whether the given FLAG works with the current language's compiler +# or gives an error. (Warnings, however, are ignored) +# +# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on +# success/failure. +# +# If EXTRA-FLAGS is defined, it is added to the current language's default +# flags (e.g. CFLAGS) when the check is done. The check is thus made with +# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to +# force the compiler to issue an error when a bad flag is given. +# +# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this +# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. +# +# LICENSE +# +# Copyright (c) 2008 Guido U. Draheim +# Copyright (c) 2011 Maarten Bosmans +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 2 + +AC_DEFUN([AX_CHECK_COMPILE_FLAG], +[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX +AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl +AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ + ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" + AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], + [AS_VAR_SET(CACHEVAR,[yes])], + [AS_VAR_SET(CACHEVAR,[no])]) + _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) +AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes], + [m4_default([$2], :)], + [m4_default([$3], :)]) +AS_VAR_POPDEF([CACHEVAR])dnl +])dnl AX_CHECK_COMPILE_FLAGS diff --git a/m4/ax_ext.m4 b/m4/ax_ext.m4 new file mode 100644 index 00000000..97a0f3e6 --- /dev/null +++ b/m4/ax_ext.m4 @@ -0,0 +1,288 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_ext.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_EXT +# +# DESCRIPTION +# +# Find supported SIMD extensions by requesting cpuid. When an SIMD +# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if +# compiler supports it. For example, if "sse2" is available, then "-msse2" +# is added to SIMD_FLAGS. +# +# This macro calls: +# +# AC_SUBST(SIMD_FLAGS) +# +# And defines: +# +# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX +# +# LICENSE +# +# Copyright (c) 2007 Christophe Tournayre +# Copyright (c) 2013 Michael Petch +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 13 + +AC_DEFUN([AX_EXT], +[ + AC_REQUIRE([AC_CANONICAL_HOST]) + + case $host_cpu in + powerpc*) + AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], + [ + if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then + if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then + ax_cv_have_altivec_ext=yes + fi + fi + ]) + + if test "$ax_cv_have_altivec_ext" = yes; then + AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) + AX_CHECK_COMPILE_FLAG(-faltivec, [SIMD_FLAGS="$SIMD_FLAGS -faltivec"], []) + fi + ;; + + + i[[3456]]86*|x86_64*|amd64*) + + AC_REQUIRE([AX_GCC_X86_CPUID]) + AC_REQUIRE([AX_GCC_X86_AVX_XGETBV]) + + AX_GCC_X86_CPUID(0x00000001) + ecx=0 + edx=0 + ebx=0 + if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown"; + then + ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` + edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` + fi + + AX_GCC_X86_CPUID(0x00000007) + if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown"; + then + ebx=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2` + fi + + AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext], + [ + ax_cv_have_mmx_ext=no + if test "$((0x$edx>>23&0x01))" = 1; then + ax_cv_have_mmx_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext], + [ + ax_cv_have_sse_ext=no + if test "$((0x$edx>>25&0x01))" = 1; then + ax_cv_have_sse_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext], + [ + ax_cv_have_sse2_ext=no + if test "$((0x$edx>>26&0x01))" = 1; then + ax_cv_have_sse2_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext], + [ + ax_cv_have_sse3_ext=no + if test "$((0x$ecx&0x01))" = 1; then + ax_cv_have_sse3_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext], + [ + ax_cv_have_ssse3_ext=no + if test "$((0x$ecx>>9&0x01))" = 1; then + ax_cv_have_ssse3_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext], + [ + ax_cv_have_sse41_ext=no + if test "$((0x$ecx>>19&0x01))" = 1; then + ax_cv_have_sse41_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext], + [ + ax_cv_have_sse42_ext=no + if test "$((0x$ecx>>20&0x01))" = 1; then + ax_cv_have_sse42_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext], + [ + ax_cv_have_avx_cpu_ext=no + if test "$((0x$ecx>>28&0x01))" = 1; then + ax_cv_have_avx_cpu_ext=yes + fi + ]) + + AC_CACHE_CHECK([whether avx2 is supported by processor], [ax_cv_have_avx2_cpu_ext], + [ + ax_cv_have_avx2_cpu_ext=no + if test "$((0x$ebx>>5&0x01))" = 1; then + ax_cv_have_avx2_cpu_ext=yes + fi + ]) + + + AC_CACHE_CHECK([whether fma is supported by processor], [ax_cv_have_fma_cpu_ext], + [ + ax_cv_have_fma_cpu_ext=no + if test "$((0x$ecx>>12&0x01))" = 1; then + ax_cv_have_fma_cpu_ext=yes + fi + ]) + + + if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then + AX_GCC_X86_AVX_XGETBV(0x00000000) + + xgetbv_eax="0" + if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then + xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` + fi + + AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext], + [ + ax_cv_have_avx_ext=no + + if test "$((0x$ecx>>27&0x01))" = 1; then + if test "$((0x$xgetbv_eax&0x6))" = 6; then + ax_cv_have_avx_ext=yes + fi + fi + ]) + if test x"$ax_cv_have_avx_ext" = x"no"; then + AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't]) + fi + fi + + if test "$ax_cv_have_mmx_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, []) + if test x"$ax_cv_support_mmx_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mmmx" + AC_DEFINE(HAVE_MMX,,[Support mmx instructions]) + else + AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_sse_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, []) + if test x"$ax_cv_support_sse_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse" + AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions]) + else + AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_sse2_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, []) + if test x"$ax_cv_support_sse2_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse2" + AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions]) + else + AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_sse3_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, []) + if test x"$ax_cv_support_sse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse3" + AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions]) + else + AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_ssse3_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, []) + if test x"$ax_cv_support_ssse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mssse3" + AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) + else + AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_sse41_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, []) + if test x"$ax_cv_support_sse41_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse4.1" + AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions]) + else + AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_sse42_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, []) + if test x"$ax_cv_support_sse42_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -msse4.2" + AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions]) + else + AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_avx_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, []) + if test x"$ax_cv_support_avx_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mavx" + AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions]) + else + AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_avx2_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mavx2, ax_cv_support_avx2_ext=yes, []) + if test x"$ax_cv_support_avx2_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mavx2" + AC_DEFINE(HAVE_AVX2,,[Support AVX2 (Advanced Vector Extensions 2) instructions]) + else + AC_MSG_WARN([Your processor supports avx2 instructions but not your compiler, can you try another compiler?]) + fi + fi + + if test "$ax_cv_have_fma_ext" = yes; then + AX_CHECK_COMPILE_FLAG(-mfma, ax_cv_support_fma_ext=yes, []) + if test x"$ax_cv_support_fma_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mfma" + AC_DEFINE(HAVE_FMA,,[Support FMA3 (Fused Multiply-Add) instructions]) + else + AC_MSG_WARN([Your processor supports fma instructions but not your compiler, can you try another compiler?]) + fi + fi + + ;; + esac + + AC_SUBST(SIMD_FLAGS) +]) diff --git a/m4/ax_gcc_x86_avx_xgetbv.m4 b/m4/ax_gcc_x86_avx_xgetbv.m4 new file mode 100644 index 00000000..2da9a6ab --- /dev/null +++ b/m4/ax_gcc_x86_avx_xgetbv.m4 @@ -0,0 +1,79 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_GCC_X86_AVX_XGETBV +# +# DESCRIPTION +# +# On later x86 processors with AVX SIMD support, with gcc or a compiler +# that has a compatible syntax for inline assembly instructions, run a +# small program that executes the xgetbv instruction with input OP. This +# can be used to detect if the OS supports AVX instruction usage. +# +# On output, the values of the eax and edx registers are stored as +# hexadecimal strings as "eax:edx" in the cache variable +# ax_cv_gcc_x86_avx_xgetbv. +# +# If the xgetbv instruction fails (because you are running a +# cross-compiler, or because you are not using gcc, or because you are on +# a processor that doesn't have this instruction), +# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown". +# +# This macro mainly exists to be used in AX_EXT. +# +# LICENSE +# +# Copyright (c) 2013 Michael Petch +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 1 + +AC_DEFUN([AX_GCC_X86_AVX_XGETBV], +[AC_REQUIRE([AC_PROG_CC]) +AC_LANG_PUSH([C]) +AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1, + [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ + int op = $1, eax, edx; + FILE *f; + /* Opcodes for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx), + : "c" (op)); + f = fopen("conftest_xgetbv", "w"); if (!f) return 1; + fprintf(f, "%x:%x\n", eax, edx); + fclose(f); + return 0; +])], + [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv], + [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv], + [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])]) +AC_LANG_POP([C]) +]) diff --git a/m4/ax_gcc_x86_cpuid.m4 b/m4/ax_gcc_x86_cpuid.m4 new file mode 100644 index 00000000..47dc8fda --- /dev/null +++ b/m4/ax_gcc_x86_cpuid.m4 @@ -0,0 +1,45 @@ +dnl @synopsis AX_GCC_X86_CPUID(OP) +dnl +dnl @summary run x86 cpuid instruction OP using gcc inline assembler +dnl +dnl On Pentium and later x86 processors, with gcc or a compiler that +dnl has a compatible syntax for inline assembly instructions, run a +dnl small program that executes the cpuid instruction with input OP. +dnl This can be used to detect the CPU type. +dnl +dnl On output, the values of the eax, ebx, ecx, and edx registers are +dnl stored as hexadecimal strings as "eax:ebx:ecx:edx" in the cache +dnl variable ax_cv_gcc_x86_cpuid_OP. +dnl +dnl If the cpuid instruction fails (because you are running a +dnl cross-compiler, or because you are not using gcc, or because you +dnl are on a processor that doesn't have this instruction), +dnl ax_cv_gcc_x86_cpuid_OP is set to the string "unknown". +dnl +dnl This macro mainly exists to be used in AX_GCC_ARCHFLAG. +dnl +dnl @category Misc +dnl @author Steven G. Johnson and Matteo Frigo. +dnl @version 2005-05-30 +dnl @license GPLWithACException + +AC_DEFUN([AX_GCC_X86_CPUID], +[AC_REQUIRE([AC_PROG_CC]) +AC_LANG_PUSH([C]) +AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, + [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ + int op = $1, eax, ebx, ecx, edx; + FILE *f; + __asm__("cpuid" + : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) + : "a" (op)); + f = fopen("conftest_cpuid", "w"); if (!f) return 1; + fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); + fclose(f); + return 0; +])], + [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], + [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], + [ax_cv_gcc_x86_cpuid_$1=unknown])]) +AC_LANG_POP([C]) +]) diff --git a/reconfigure_script b/scripts/reconfigure_script similarity index 100% rename from reconfigure_script rename to scripts/reconfigure_script From 96ad352741f6bda01b7b4a2d8328c8a0c636c811 Mon Sep 17 00:00:00 2001 From: neo Date: Fri, 29 May 2015 11:41:02 +0900 Subject: [PATCH 02/22] Some modifications to the configure to check SIMD support --- configure | 2 +- configure.ac | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configure b/configure index ea0a71c2..2d785792 100755 --- a/configure +++ b/configure @@ -6709,7 +6709,7 @@ $as_echo "$as_me: WARNING: Your processor does not support AVX2 instructions" >& $as_echo "#define AVX512 1" >>confdefs.h - supported=yes + supported="cross compilation" ;; *) as_fn_error $? "${ac_SIMD} unsupported --enable-simd option" "$LINENO" 5; diff --git a/configure.ac b/configure.ac index 00622bb8..1edb3e33 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Project Grid package # -# Time-stamp: <2015-05-27 18:29:04 neo> +# Time-stamp: <2015-05-27 18:51:47 neo> AC_PREREQ([2.63]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) @@ -106,7 +106,7 @@ case ${ac_SIMD} in AVX512|MIC) echo Configuring for AVX512 and MIC AC_DEFINE([AVX512],[1],[AVX512] ) - supported=yes + supported="cross compilation" ;; *) AC_MSG_ERROR([${ac_SIMD} unsupported --enable-simd option]); From 0bc004de7cebb246252a987e2b6dabd204731ff8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 2 Jun 2015 16:57:12 +0100 Subject: [PATCH 03/22] Domain wall fermions now invert ; have the basis set up for Tanh/Zolo * (Cayley/PartFrac/ContFrac) * (Mobius/Shamir/Wilson) Approx Representation Kernel. All are done with space-time taking part in checkerboarding, Ls uncheckerboarded Have only so far tested the Domain Wall limit of mobius, and at that only checked that it i) Inverts ii) 5dim DW == Ls copies of 4dim D2 iii) MeeInv Mee == 1 iv) Meo+Mee+Moe+Moo == M unprec. v) MpcDagMpc is hermitan vi) Mdag is the adjoint of M between stochastic vectors. That said, the RB schur solve, RB MpcDagMpc solve, Unprec solve all converge and the true residual becomes small; so pretty good tests. --- benchmarks/Grid_dwf.cc | 79 +++--- benchmarks/Grid_dwf_cg_prec.cc | 58 +++++ benchmarks/Grid_dwf_cg_schur.cc | 53 ++++ benchmarks/Grid_dwf_cg_unprec.cc | 53 ++++ benchmarks/Grid_dwf_even_odd.cc | 207 +++++++++++++++ benchmarks/Grid_wilson_evenodd.cc | 5 +- benchmarks/Makefile.am | 46 +++- lib/Grid_init.cc | 2 +- lib/Makefile.am | 16 +- lib/algorithms/LinearOperator.h | 32 --- lib/algorithms/approx/Zolotarev.cc | 9 +- lib/algorithms/approx/Zolotarev.h | 9 +- lib/cartesian/Grid_cartesian_base.h | 2 +- lib/cartesian/Grid_cartesian_full.h | 6 +- lib/cartesian/Grid_cartesian_red_black.h | 24 +- lib/communicator/Grid_communicator_base.h | 2 +- lib/communicator/Grid_communicator_mpi.cc | 2 +- lib/communicator/Grid_communicator_none.cc | 2 +- lib/qcd/LinalgUtils.h | 113 +++++++++ lib/qcd/QCD.h | 2 + lib/qcd/SpaceTimeGrid.cc | 52 ++++ lib/qcd/SpaceTimeGrid.h | 18 ++ lib/qcd/action/Actions.h | 78 +++++- lib/qcd/action/fermion/CayleyFermion5D.cc | 235 ++++++++++++++++++ lib/qcd/action/fermion/CayleyFermion5D.h | 61 +++++ .../fermion/ContinuedFractionFermion5D.cc | 119 +++++++++ .../fermion/ContinuedFractionFermion5D.h | 53 ++++ lib/qcd/action/fermion/DomainWallFermion.h | 118 +++++++++ .../{FermionAction.h => FermionOperator.h} | 7 +- .../fermion/PartialFractionFermion5D.cc | 47 ++++ .../action/fermion/PartialFractionFermion5D.h | 49 ++++ lib/qcd/action/fermion/WilsonFermion.cc | 6 +- lib/qcd/action/fermion/WilsonFermion.h | 6 +- ...DimWilsonFermion.cc => WilsonFermion5D.cc} | 106 +++----- ...veDimWilsonFermion.h => WilsonFermion5D.h} | 31 +-- lib/stencil/Grid_stencil_common.cc | 12 +- 36 files changed, 1500 insertions(+), 220 deletions(-) create mode 100644 benchmarks/Grid_dwf_cg_prec.cc create mode 100644 benchmarks/Grid_dwf_cg_schur.cc create mode 100644 benchmarks/Grid_dwf_cg_unprec.cc create mode 100644 benchmarks/Grid_dwf_even_odd.cc create mode 100644 lib/qcd/LinalgUtils.h create mode 100644 lib/qcd/SpaceTimeGrid.cc create mode 100644 lib/qcd/SpaceTimeGrid.h create mode 100644 lib/qcd/action/fermion/CayleyFermion5D.cc create mode 100644 lib/qcd/action/fermion/CayleyFermion5D.h create mode 100644 lib/qcd/action/fermion/ContinuedFractionFermion5D.cc create mode 100644 lib/qcd/action/fermion/ContinuedFractionFermion5D.h create mode 100644 lib/qcd/action/fermion/DomainWallFermion.h rename lib/qcd/action/fermion/{FermionAction.h => FermionOperator.h} (92%) create mode 100644 lib/qcd/action/fermion/PartialFractionFermion5D.cc create mode 100644 lib/qcd/action/fermion/PartialFractionFermion5D.h rename lib/qcd/action/fermion/{FiveDimWilsonFermion.cc => WilsonFermion5D.cc} (68%) rename lib/qcd/action/fermion/{FiveDimWilsonFermion.h => WilsonFermion5D.h} (72%) diff --git a/benchmarks/Grid_dwf.cc b/benchmarks/Grid_dwf.cc index eb1d9299..62869b17 100644 --- a/benchmarks/Grid_dwf.cc +++ b/benchmarks/Grid_dwf.cc @@ -24,43 +24,28 @@ int main (int argc, char ** argv) std::cout << "Grid is setup to use "< latt4 = GridDefaultLatt(); - std::vector simd4 = GridDefaultSimd(Nd,vComplexF::Nsimd()); - std::vector mpi4 = GridDefaultMpi(); - - assert(latt4.size()==4 ); - assert(simd4.size()==4 ); - assert(mpi4.size() ==4 ); - - const int Ls=1; - std::vector latt5({Ls,latt4[0],latt4[1],latt4[2],latt4[3]}); - std::vector simd5({1 ,simd4[0],simd4[1],simd4[2],simd4[3]}); - std::vector mpi5({1 , mpi4[0], mpi4[1], mpi4[2], mpi4[3]}); - std::vector cb5({0,1,1,1,1}); // Checkerboard 4d only - int cbd=1; // use dim-1 to reduce - - // Four dim grid for gauge field U - GridCartesian UGrid(latt4,simd4,mpi4); - GridRedBlackCartesian UrbGrid(&UGrid); - - // Five dim grid for fermions F - GridCartesian FGrid(latt5,simd5,mpi5); - GridRedBlackCartesian FrbGrid(latt5,simd5,mpi5,cb5,cbd); + const int Ls=8; + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); std::vector seeds4({1,2,3,4}); std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG5(&FGrid); RNG5.SeedFixedIntegers(seeds5); - LatticeFermion src (&FGrid); random(RNG5,src); - LatticeFermion result(&FGrid); result=zero; - LatticeFermion ref(&FGrid); ref=zero; - LatticeFermion tmp(&FGrid); - LatticeFermion err(&FGrid); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + + LatticeFermion src (FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); + LatticeFermion err(FGrid); ColourMatrix cm = Complex(1.0,0.0); - GridParallelRNG RNG4(&UGrid); RNG4.SeedFixedIntegers(seeds4); - LatticeGaugeField Umu(&UGrid); random(RNG4,Umu); - LatticeGaugeField Umu5d(&FGrid); + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + LatticeGaugeField Umu5d(FGrid); // replicate across fifth dimension for(int ss=0;ssoSites();ss++){ @@ -72,7 +57,7 @@ int main (int argc, char ** argv) //////////////////////////////////// // Naive wilson implementation //////////////////////////////////// - std::vector U(4,&FGrid); + std::vector U(4,FGrid); for(int mu=0;mu(Umu5d,mu); } @@ -93,17 +78,17 @@ int main (int argc, char ** argv) } RealD mass=0.1; - FiveDimWilsonFermion Dw(Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass); + RealD M5 =1.8; + DomainWallFermion Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); std::cout << "Calling Dw"< + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + LatticeFermion src_o(FrbGrid); + LatticeFermion result_o(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o=zero; + + HermitianCheckerBoardedOperator HermOpEO(Ddwf); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOpEO,src_o,result_o); + + Grid_finalize(); +} diff --git a/benchmarks/Grid_dwf_cg_schur.cc b/benchmarks/Grid_dwf_cg_schur.cc new file mode 100644 index 00000000..aac4d3fd --- /dev/null +++ b/benchmarks/Grid_dwf_cg_schur.cc @@ -0,0 +1,53 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + ConjugateGradient CG(1.0e-8,10000); + SchurRedBlackSolve SchurSolver(CG); + SchurSolver(Ddwf,src,result); + + Grid_finalize(); +} diff --git a/benchmarks/Grid_dwf_cg_unprec.cc b/benchmarks/Grid_dwf_cg_unprec.cc new file mode 100644 index 00000000..5c9e7ad3 --- /dev/null +++ b/benchmarks/Grid_dwf_cg_unprec.cc @@ -0,0 +1,53 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + const int Ls=8; + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeFermion src(FGrid); random(RNG5,src); + LatticeFermion result(FGrid); result=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + + std::vector U(4,UGrid); + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD M5=1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + HermitianOperator HermOp(Ddwf); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOp,src,result); + + Grid_finalize(); +} diff --git a/benchmarks/Grid_dwf_even_odd.cc b/benchmarks/Grid_dwf_even_odd.cc new file mode 100644 index 00000000..ac47bbf9 --- /dev/null +++ b/benchmarks/Grid_dwf_even_odd.cc @@ -0,0 +1,207 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout << "Grid is setup to use "< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + + LatticeFermion src (FGrid); random(RNG5,src); + LatticeFermion phi (FGrid); random(RNG5,phi); + LatticeFermion chi (FGrid); random(RNG5,chi); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); tmp=zero; + LatticeFermion err(FGrid); tmp=zero; + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + std::vector U(4,UGrid); + + // Only one non-zero (y) + Umu=zero; + for(int nn=0;nn0 ) + U[nn]=zero; + pokeIndex(Umu,U[nn],nn); + } + + RealD mass=0.1; + RealD M5 =1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + + LatticeFermion src_e (FrbGrid); + LatticeFermion src_o (FrbGrid); + LatticeFermion r_e (FrbGrid); + LatticeFermion r_o (FrbGrid); + LatticeFermion r_eo (FGrid); + LatticeFermion r_eeoo(FGrid); + + std::cout<<"=========================================================="< * = < chi | Deo^dag| phi> "< class GaugeAction - virtual const CreateGaugeState& getCreateState() const = 0; - virtual GaugeState* createState(const Q& q) const - virtual const GaugeBC& getGaugeBC() const - virtual const Set& getSet(void) const = 0; - virtual void deriv(P& result, const Handle< GaugeState >& state) const - virtual Double S(const Handle< GaugeState >& state) const = 0; - class LinearGaugeAction : public GaugeAction< multi1d, multi1d > - typedef multi1d P; - typedef multi1d Q; - virtual void staple(LatticeColorMatrix& result, - const Handle< GaugeState >& state, - int mu, int cb) const = 0; - */ - - // Chroma interface defining FermionAction - /* - template class FermAct4D : public FermionAction - virtual LinearOperator* linOp(Handle< FermState > state) const = 0; - virtual LinearOperator* lMdagM(Handle< FermState > state) const = 0; - virtual LinOpSystemSolver* invLinOp(Handle< FermState > state, - virtual MdagMSystemSolver* invMdagM(Handle< FermState > state, - virtual LinOpMultiSystemSolver* mInvLinOp(Handle< FermState > state, - virtual MdagMMultiSystemSolver* mInvMdagM(Handle< FermState > state, - virtual MdagMMultiSystemSolverAccumulate* mInvMdagMAcc(Handle< FermState > state, - virtual SystemSolver* qprop(Handle< FermState > state, - class DiffFermAct4D : public FermAct4D - virtual DiffLinearOperator* linOp(Handle< FermState > state) const = 0; - virtual DiffLinearOperator* lMdagM(Handle< FermState > state) const = 0; - */ } #endif diff --git a/lib/algorithms/approx/Zolotarev.cc b/lib/algorithms/approx/Zolotarev.cc index 7629bbde..c73a3436 100644 --- a/lib/algorithms/approx/Zolotarev.cc +++ b/lib/algorithms/approx/Zolotarev.cc @@ -58,6 +58,8 @@ /* Compute the partial fraction expansion coefficients (alpha) from the * factored form */ +namespace Grid { +namespace Approx { static void construct_partfrac(izd *z) { int dn = z -> dn, dd = z -> dd, type = z -> type; @@ -291,7 +293,7 @@ static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k, * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and * type = 1 for the approximation which is infinite at x = 0. */ -zolotarev_data* bfm_zolotarev(PRECISION epsilon, int n, int type) { +zolotarev_data* grid_zolotarev(PRECISION epsilon, int n, int type) { INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F, l, invlambda, xi, xisq, *tv, s, opl; int m, czero, ts; @@ -412,7 +414,7 @@ zolotarev_data* bfm_zolotarev(PRECISION epsilon, int n, int type) { return zd; } -zolotarev_data* bfm_higham(PRECISION epsilon, int n) { +zolotarev_data* grid_higham(PRECISION epsilon, int n) { INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq; int m, czero; zolotarev_data *zd; @@ -502,6 +504,7 @@ zolotarev_data* bfm_higham(PRECISION epsilon, int n) { free(d); return zd; } +}} #ifdef TEST @@ -707,4 +710,6 @@ int main(int argc, char** argv) { return EXIT_SUCCESS; } + + #endif /* TEST */ diff --git a/lib/algorithms/approx/Zolotarev.h b/lib/algorithms/approx/Zolotarev.h index 3f0dc58e..869e5a89 100644 --- a/lib/algorithms/approx/Zolotarev.h +++ b/lib/algorithms/approx/Zolotarev.h @@ -1,7 +1,8 @@ /* -*- Mode: C; comment-column: 22; fill-column: 79; -*- */ #ifdef __cplusplus -extern "C" { +namespace Grid { +namespace Approx { #endif #define HVERSION Header Time-stamp: <14-OCT-2004 09:26:51.00 adk@MISSCONTRARY> @@ -76,10 +77,10 @@ typedef struct { * zolotarev_data structure. The arguments must satisfy the constraints that * epsilon > 0, n > 0, and type = 0 or 1. */ -ZOLOTAREV_DATA* bfm_higham(PRECISION epsilon, int n) ; -ZOLOTAREV_DATA* bfm_zolotarev(PRECISION epsilon, int n, int type); +ZOLOTAREV_DATA* grid_higham(PRECISION epsilon, int n) ; +ZOLOTAREV_DATA* grid_zolotarev(PRECISION epsilon, int n, int type); #endif #ifdef __cplusplus -} +}} #endif diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Grid_cartesian_base.h index e93125c1..66339648 100644 --- a/lib/cartesian/Grid_cartesian_base.h +++ b/lib/cartesian/Grid_cartesian_base.h @@ -21,7 +21,7 @@ public: // Give Lattice access template friend class Lattice; - GridBase(std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; + GridBase(const std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; // Physics Grid information. diff --git a/lib/cartesian/Grid_cartesian_full.h b/lib/cartesian/Grid_cartesian_full.h index 330bbfaf..2a9e0be8 100644 --- a/lib/cartesian/Grid_cartesian_full.h +++ b/lib/cartesian/Grid_cartesian_full.h @@ -27,9 +27,9 @@ public: virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ return shift; } - GridCartesian(std::vector &dimensions, - std::vector &simd_layout, - std::vector &processor_grid + GridCartesian(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid ) : GridBase(processor_grid) { /////////////////////// diff --git a/lib/cartesian/Grid_cartesian_red_black.h b/lib/cartesian/Grid_cartesian_red_black.h index ace36edb..3a84ed49 100644 --- a/lib/cartesian/Grid_cartesian_red_black.h +++ b/lib/cartesian/Grid_cartesian_red_black.h @@ -81,28 +81,28 @@ public: } }; - GridRedBlackCartesian(GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {}; + GridRedBlackCartesian(const GridBase *base) : GridRedBlackCartesian(base->_fdimensions,base->_simd_layout,base->_processors) {}; - GridRedBlackCartesian(std::vector &dimensions, - std::vector &simd_layout, - std::vector &processor_grid, - std::vector &checker_dim_mask, + GridRedBlackCartesian(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid, + const std::vector &checker_dim_mask, int checker_dim ) : GridBase(processor_grid) { Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); } - GridRedBlackCartesian(std::vector &dimensions, - std::vector &simd_layout, - std::vector &processor_grid) : GridBase(processor_grid) + GridRedBlackCartesian(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid) : GridBase(processor_grid) { std::vector checker_dim_mask(dimensions.size(),1); Init(dimensions,simd_layout,processor_grid,checker_dim_mask,0); } - void Init(std::vector &dimensions, - std::vector &simd_layout, - std::vector &processor_grid, - std::vector &checker_dim_mask, + void Init(const std::vector &dimensions, + const std::vector &simd_layout, + const std::vector &processor_grid, + const std::vector &checker_dim_mask, int checker_dim) { /////////////////////// diff --git a/lib/communicator/Grid_communicator_base.h b/lib/communicator/Grid_communicator_base.h index 47c1f525..61e19993 100644 --- a/lib/communicator/Grid_communicator_base.h +++ b/lib/communicator/Grid_communicator_base.h @@ -27,7 +27,7 @@ class CartesianCommunicator { #endif // Constructor - CartesianCommunicator(std::vector &pdimensions_in); + CartesianCommunicator(const std::vector &pdimensions_in); // Wraps MPI_Cart routines void ShiftedRanks(int dim,int shift,int & source, int & dest); diff --git a/lib/communicator/Grid_communicator_mpi.cc b/lib/communicator/Grid_communicator_mpi.cc index 6ef05c3d..5dd34705 100644 --- a/lib/communicator/Grid_communicator_mpi.cc +++ b/lib/communicator/Grid_communicator_mpi.cc @@ -5,7 +5,7 @@ namespace Grid { // Should error check all MPI calls. -CartesianCommunicator::CartesianCommunicator(std::vector &processors) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _ndimension = processors.size(); std::vector periodic(_ndimension,1); diff --git a/lib/communicator/Grid_communicator_none.cc b/lib/communicator/Grid_communicator_none.cc index 90eb26cc..d7eb9453 100644 --- a/lib/communicator/Grid_communicator_none.cc +++ b/lib/communicator/Grid_communicator_none.cc @@ -1,7 +1,7 @@ #include "Grid.h" namespace Grid { -CartesianCommunicator::CartesianCommunicator(std::vector &processors) +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _processors = processors; _ndimension = processors.size(); diff --git a/lib/qcd/LinalgUtils.h b/lib/qcd/LinalgUtils.h new file mode 100644 index 00000000..2b83d115 --- /dev/null +++ b/lib/qcd/LinalgUtils.h @@ -0,0 +1,113 @@ +#ifndef GRID_QCD_LINALG_UTILS_H +#define GRID_QCD_LINALG_UTILS_H + +namespace Grid{ +namespace QCD{ +//////////////////////////////////////////////////////////////////////// +//This file brings additional linear combination assist that is helpful +//to QCD such as chiral projectors and spin matrices applied to one of the inputs. +//These routines support five-D chiral fermions and contain s-subslice indexing +//on the 5d (rb4d) checkerboarded lattices +//////////////////////////////////////////////////////////////////////// +template +void axpby_ssp(Lattice &z, RealD a,const Lattice &x,RealD b,const Lattice &y,int s,int sp) +{ + z.checkerboard = x.checkerboard; + conformable(x,y); + conformable(x,z); + GridBase *grid=x._grid; + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp = a*x._odata[ss+s]+b*y._odata[ss+sp]; + vstream(z._odata[ss+s],tmp); + } +} + +template +void ag5xpby_ssp(Lattice &z,RealD a,const Lattice &x,RealD b,const Lattice &y,int s,int sp) +{ + z.checkerboard = x.checkerboard; + conformable(x,y); + conformable(x,z); + GridBase *grid=x._grid; + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp; + multGamma5(tmp(),a*x._odata[ss+s]()); + tmp = tmp + b*y._odata[ss+sp]; + vstream(z._odata[ss+s],tmp); + } +} + +template +void axpbg5y_ssp(Lattice &z,RealD a,const Lattice &x,RealD b,const Lattice &y,int s,int sp) +{ + z.checkerboard = x.checkerboard; + conformable(x,y); + conformable(x,z); + GridBase *grid=x._grid; + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp; + multGamma5(tmp(),b*y._odata[ss+sp]()); + tmp = tmp + a*x._odata[ss+s]; + vstream(z._odata[ss+s],tmp); + } +} + +template +void ag5xpbg5y_ssp(Lattice &z,RealD a,const Lattice &x,RealD b,const Lattice &y,int s,int sp) +{ + z.checkerboard = x.checkerboard; + conformable(x,y); + conformable(x,z); + GridBase *grid=x._grid; + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp1; + vobj tmp2; + tmp1 = a*x._odata[ss+s]+b*y._odata[ss+sp]; + multGamma5(tmp2(),tmp1()); + vstream(z._odata[ss+s],tmp2); + } +} + +template +void axpby_ssp_pminus(Lattice &z,RealD a,const Lattice &x,RealD b,const Lattice &y,int s,int sp) +{ + z.checkerboard = x.checkerboard; + conformable(x,y); + conformable(x,z); + GridBase *grid=x._grid; + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp; + spProj5m(tmp,y._odata[ss+sp]); + tmp = a*x._odata[ss+s]+b*tmp; + vstream(z._odata[ss+s],tmp); + } +} + +template +void axpby_ssp_pplus(Lattice &z,RealD a,const Lattice &x,RealD b,const Lattice &y,int s,int sp) +{ + z.checkerboard = x.checkerboard; + conformable(x,y); + conformable(x,z); + GridBase *grid=x._grid; + int Ls = grid->_rdimensions[0]; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + vobj tmp; + spProj5p(tmp,y._odata[ss+sp]); + tmp = a*x._odata[ss+s]+b*tmp; + vstream(z._odata[ss+s],tmp); + } +} +}} +#endif diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index 7c45eb23..4b5edc5c 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -307,8 +307,10 @@ namespace QCD { } //namespace QCD } // Grid +#include #include #include +#include #include #endif diff --git a/lib/qcd/SpaceTimeGrid.cc b/lib/qcd/SpaceTimeGrid.cc new file mode 100644 index 00000000..284c5771 --- /dev/null +++ b/lib/qcd/SpaceTimeGrid.cc @@ -0,0 +1,52 @@ +#include + +namespace Grid { + namespace QCD { + +///////////////////////////////////////////////////////////////// +// Public interface +///////////////////////////////////////////////////////////////// +GridCartesian *SpaceTimeGrid::makeFourDimGrid(const std::vector & latt,const std::vector &simd,const std::vector &mpi) +{ + return new GridCartesian(latt,simd,mpi); +} +GridRedBlackCartesian *SpaceTimeGrid::makeFourDimRedBlackGrid(const GridCartesian *FourDimGrid) +{ + return new GridRedBlackCartesian(FourDimGrid); +} + +GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian *FourDimGrid) +{ + int N4=FourDimGrid->_ndimension; + + std::vector latt5(1,Ls); + std::vector simd5(1,1); + std::vector mpi5(1,1); + + for(int d=0;d_fdimensions[d]); + simd5.push_back(FourDimGrid->_simd_layout[d]); + mpi5.push_back(FourDimGrid->_processors[d]); + } + return new GridCartesian(latt5,simd5,mpi5); +} + +GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridCartesian *FourDimGrid) +{ + int N4=FourDimGrid->_ndimension; + int cbd=1; + std::vector latt5(1,Ls); + std::vector simd5(1,1); + std::vector mpi5(1,1); + std::vector cb5(1,0); + + for(int d=0;d_fdimensions[d]); + simd5.push_back(FourDimGrid->_simd_layout[d]); + mpi5.push_back(FourDimGrid->_processors[d]); + cb5.push_back( 1); + } + return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd); +} + +}} diff --git a/lib/qcd/SpaceTimeGrid.h b/lib/qcd/SpaceTimeGrid.h new file mode 100644 index 00000000..0b386a0e --- /dev/null +++ b/lib/qcd/SpaceTimeGrid.h @@ -0,0 +1,18 @@ +#ifndef GRID_QCD_SPACE_TIME_GRID_H +#define GRID_QCD_SPACE_TIME_GRID_H +namespace Grid { +namespace QCD { + +class SpaceTimeGrid { + public: + + static GridCartesian *makeFourDimGrid(const std::vector & latt,const std::vector &simd,const std::vector &mpi); + static GridRedBlackCartesian *makeFourDimRedBlackGrid (const GridCartesian *FourDimGrid); + static GridCartesian *makeFiveDimGrid (int Ls,const GridCartesian *FourDimGrid); + static GridRedBlackCartesian *makeFiveDimRedBlackGrid(int Ls,const GridCartesian *FourDimGrid); + +}; + +}} + +#endif diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index c4e8a2f0..acbf027c 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -1,10 +1,80 @@ #ifndef GRID_QCD_ACTIONS_H #define GRID_QCD_ACTIONS_H -#include -#include -#include + +// Some reorganisation likely required as both Chroma and IroIro +// are separating the concept of the operator from that of action. +// +// The FermAction contains methods to create +// +// * Linear operators (Hermitian and non-hermitian) .. my LinearOperator +// * System solvers (Hermitian and non-hermitian) .. my OperatorFunction +// * MultiShift System solvers (Hermitian and non-hermitian) .. my OperatorFunction + + +//////////////////////////////////////////// +// Abstract base interface +//////////////////////////////////////////// +#include + +//////////////////////////////////////////// +// Utility functions +//////////////////////////////////////////// +#include //used by all wilson type fermions +#include //used by all wilson type fermions + +//////////////////////////////////////////// +// 4D formulations +//////////////////////////////////////////// #include -#include +//#include + +//////////////////////////////////////////// +// 5D formulations +//////////////////////////////////////////// +#include // used by all 5d overlap types +#include +#include +//#include + +#include +//#include + + + // Chroma interface defining FermionAction + /* + template class FermAct4D : public FermionAction + virtual LinearOperator* linOp(Handle< FermState > state) const = 0; + virtual LinearOperator* lMdagM(Handle< FermState > state) const = 0; + virtual LinOpSystemSolver* invLinOp(Handle< FermState > state, + virtual MdagMSystemSolver* invMdagM(Handle< FermState > state, + virtual LinOpMultiSystemSolver* mInvLinOp(Handle< FermState > state, + virtual MdagMMultiSystemSolver* mInvMdagM(Handle< FermState > state, + virtual MdagMMultiSystemSolverAccumulate* mInvMdagMAcc(Handle< FermState > state, + virtual SystemSolver* qprop(Handle< FermState > state, + class DiffFermAct4D : public FermAct4D + virtual DiffLinearOperator* linOp(Handle< FermState > state) const = 0; + virtual DiffLinearOperator* lMdagM(Handle< FermState > state) const = 0; + */ + + + // Chroma interface defining GaugeAction + /* + template class GaugeAction + virtual const CreateGaugeState& getCreateState() const = 0; + virtual GaugeState* createState(const Q& q) const + virtual const GaugeBC& getGaugeBC() const + virtual const Set& getSet(void) const = 0; + virtual void deriv(P& result, const Handle< GaugeState >& state) const + virtual Double S(const Handle< GaugeState >& state) const = 0; + + class LinearGaugeAction : public GaugeAction< multi1d, multi1d > + typedef multi1d P; + typedef multi1d Q; + virtual void staple(LatticeColorMatrix& result, + const Handle< GaugeState >& state, + int mu, int cb) const = 0; + */ + #endif diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc new file mode 100644 index 00000000..263cc28b --- /dev/null +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -0,0 +1,235 @@ +#include +namespace Grid { +namespace QCD { + + CayleyFermion5D::CayleyFermion5D(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5) : + WilsonFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_M5), + mass(_mass) + { + std::cout << "Constructing a CayleyFermion5D"< D1+^dag P+ D2-^dag + //D2- P+ D2+ P-D1-^dag D2+dag + + LatticeFermion Din(psi._grid); + // Apply Dw + DW(psi,Din,DaggerYes); + + for(int s=0;s=0;s--){ + axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls] + } + } + + void CayleyFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi) + { + // Apply (U^{\prime})^{-dagger} + axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0] + for (int s=1;s=0;s--){ + axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls] + } + } + +} +} + diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h new file mode 100644 index 00000000..857ac124 --- /dev/null +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -0,0 +1,61 @@ +#ifndef GRID_QCD_CAYLEY_FERMION_H +#define GRID_QCD_CAYLEY_FERMION_H + +namespace Grid { + + namespace QCD { + + class CayleyFermion5D : public WilsonFermion5D + { + public: + + // override multiply + virtual RealD M (const LatticeFermion &in, LatticeFermion &out); + virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out); + + // half checkerboard operations + virtual void Meooe (const LatticeFermion &in, LatticeFermion &out); + virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); + + // protected: + + Approx::zolotarev_data *zdata; + + RealD mass; + // Cayley form Moebius (tanh and zolotarev) + std::vector omega; + std::vector bs; // S dependent coeffs + std::vector cs; + std::vector as; + // For preconditioning Cayley form + std::vector bee; + std::vector cee; + std::vector aee; + std::vector beo; + std::vector ceo; + std::vector aeo; + // LDU factorisation of the eeoo matrix + std::vector lee; + std::vector leem; + std::vector uee; + std::vector ueem; + std::vector dee; + + // Constructors + CayleyFermion5D(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5); + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc new file mode 100644 index 00000000..c281b486 --- /dev/null +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc @@ -0,0 +1,119 @@ +#include + +namespace Grid { + + namespace QCD { + + RealD ContinuedFractionFermion5D::M (const LatticeFermion &psi, LatticeFermion &chi) + { + LatticeFermion D(psi._grid); + + DW(psi,D,DaggerNo); + + int sign=1; + for(int s=0;sM5)*scale; + + int sign=1; + for(int s=0;smass)/(1-this->mass); + ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1); + ag5xpby_ssp(chi,R,psi,1.0,chi,s,s); + } else { + ag5xpby_ssp(chi,cc[s]*Beta[s]*sign*dw_diag,psi,sqrt_cc[s],psi,s,s+1); + axpby_ssp(chi,1.0,chi,sqrt_cc[s-1],psi,s,s-1); + } + sign=-sign; + } + } + + void ContinuedFractionFermion5D::MooeeDag (const LatticeFermion &psi, LatticeFermion &chi) + { + Mooee(psi,chi); + } + void ContinuedFractionFermion5D::MooeeInv (const LatticeFermion &psi, LatticeFermion &chi) + { + // Apply Linv + axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); + for(int s=1;sLs-1,this->Ls-1); + for(int s=Ls-2;s>=0;s--){ + axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1); + } + } + void ContinuedFractionFermion5D::MooeeInvDag (const LatticeFermion &psi, LatticeFermion &chi) + { + MooeeInv(psi,chi); + } + + // Constructors + ContinuedFractionFermion5D::ContinuedFractionFermion5D( + LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5) : + WilsonFermion5D(_Umu, + FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid,M5), + mass(_mass) + { + } + + } +} + diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h new file mode 100644 index 00000000..7f5c022a --- /dev/null +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -0,0 +1,53 @@ +#ifndef GRID_QCD_CONTINUED_FRACTION_H +#define GRID_QCD_CONTINUED_FRACTION_H + +namespace Grid { + + namespace QCD { + + class ContinuedFractionFermion5D : public WilsonFermion5D + { + public: + + // override multiply + virtual RealD M (const LatticeFermion &in, LatticeFermion &out); + virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out); + + // half checkerboard operaions + virtual void Meooe (const LatticeFermion &in, LatticeFermion &out); + virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); + + private: + + Approx::zolotarev_data *zdata; + + // Cont frac + RealD mass; + RealD R; + RealD scale; + std::vector Beta; + std::vector cc;; + std::vector cc_d;; + std::vector sqrt_cc; + std::vector See; + std::vector Aee; + + // Constructors + ContinuedFractionFermion5D(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5); + + }; + + + } +} + +#endif diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h new file mode 100644 index 00000000..2abb6eb2 --- /dev/null +++ b/lib/qcd/action/fermion/DomainWallFermion.h @@ -0,0 +1,118 @@ +#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H +#define GRID_QCD_DOMAIN_WALL_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class DomainWallFermion : public CayleyFermion5D + { + public: + + // Constructors + DomainWallFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5) + + { + RealD eps = 1.0; + + zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham + assert(zdata->n==this->Ls); + + /////////////////////////////////////////////////////////// + // The Cayley coeffs (unprec) + /////////////////////////////////////////////////////////// + this->omega.resize(this->Ls); + this->bs.resize(this->Ls); + this->cs.resize(this->Ls); + this->as.resize(this->Ls); + + for(int i=0; i < this->Ls; i++){ + this->as[i] = 1.0; + this->omega[i] = ((double)zdata -> gamma[i]); + double bb=1.0; + this->bs[i] = 0.5*(bb/(this->omega[i]) + 1.0); + this->cs[i] = 0.5*(bb/(this->omega[i]) - 1.0); + } + + //////////////////////////////////////////////////////// + // Constants for the preconditioned matrix Cayley form + //////////////////////////////////////////////////////// + this->bee.resize(this->Ls); + this->cee.resize(this->Ls); + this->beo.resize(this->Ls); + this->ceo.resize(this->Ls); + + for(int i=0;iLs;i++){ + this->bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0); + this->cee[i]=as[i]*(1.0-cs[i]*(4.0-M5)); + this->beo[i]=as[i]*bs[i]; + this->ceo[i]=-as[i]*cs[i]; + } + + aee.resize(this->Ls); + aeo.resize(this->Ls); + for(int i=0;iLs;i++){ + aee[i]=cee[i]; + aeo[i]=ceo[i]; + } + + ////////////////////////////////////////// + // LDU decomposition of eeoo + ////////////////////////////////////////// + dee.resize(this->Ls); + lee.resize(this->Ls); + leem.resize(this->Ls); + uee.resize(this->Ls); + ueem.resize(this->Ls); + + for(int i=0;iLs;i++){ + + dee[i] = bee[i]; + + if ( i < this->Ls-1 ) { + + lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column + + leem[i]=this->mass*cee[this->Ls-1]/bee[0]; + for(int j=0;jmass; + for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j]; + ueem[i]*= aee[0]/bee[0]; + + } else { + lee[i] =0.0; + leem[i]=0.0; + uee[i] =0.0; + ueem[i]=0.0; + } + } + + { + double delta_d=mass*cee[this->Ls-1]; + for(int j=0;jLs-1;j++) delta_d *= cee[j]/bee[j]; + dee[this->Ls-1] += delta_d; + } + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/FermionAction.h b/lib/qcd/action/fermion/FermionOperator.h similarity index 92% rename from lib/qcd/action/fermion/FermionAction.h rename to lib/qcd/action/fermion/FermionOperator.h index 1b05174b..47c47478 100644 --- a/lib/qcd/action/fermion/FermionAction.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -1,5 +1,5 @@ -#ifndef GRID_QCD_WILSON_DOP_H -#define GRID_QCD_WILSON_DOP_H +#ifndef GRID_QCD_FERMION_OPERATOR_H +#define GRID_QCD_FERMION_OPERATOR_H namespace Grid { @@ -11,7 +11,7 @@ namespace Grid { // Think about multiple representations ////////////////////////////////////////////////////////////////////////////// template - class FermionAction : public CheckerBoardedSparseMatrixBase + class FermionOperator : public CheckerBoardedSparseMatrixBase { public: @@ -40,6 +40,7 @@ namespace Grid { virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0; virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0; + }; } diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc new file mode 100644 index 00000000..21f62a75 --- /dev/null +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc @@ -0,0 +1,47 @@ +#ifndef GRID_QCD_PARTIAL_FRACTION_H +#define GRID_QCD_PARTIAL_FRACTION_H + +namespace Grid { + + namespace QCD { + + class PartialFractionFermion5D : public WilsonFermion5D + { + public: + + // override multiply + virtual RealD M (const LatticeFermion &in, LatticeFermion &out); + virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out); + + // half checkerboard operaions + virtual void Meooe (const LatticeFermion &in, LatticeFermion &out); + virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); + + private: + + zolotarev_data *zdata; + + // Part frac + double R=(1+this->mass)/(1-this->mass); + std::vector p; + std::vector q; + + // Constructors + PartialFractionFermion5D(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5); + + }; + + + } +} + +#endif diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h new file mode 100644 index 00000000..c51aa563 --- /dev/null +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h @@ -0,0 +1,49 @@ +#ifndef GRID_QCD_PARTIAL_FRACTION_H +#define GRID_QCD_PARTIAL_FRACTION_H + +namespace Grid { + + namespace QCD { + + class PartialFractionFermion5D : public WilsonFermion5D + { + public: + + // override multiply + virtual RealD M (const LatticeFermion &in, LatticeFermion &out); + virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out); + + // half checkerboard operaions + virtual void Meooe (const LatticeFermion &in, LatticeFermion &out); + virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); + virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); + + private: + + virtual void PartialFractionCoefficients(void); + + zolotarev_data *zdata; + + // Part frac + double R=(1+this->mass)/(1-this->mass); + std::vector p; + std::vector q; + + // Constructors + PartialFractionFermion5D(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5); + + }; + + + } +} + +#endif diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index aa30a7fa..9f2da251 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -9,9 +9,9 @@ const std::vector WilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1}); int WilsonFermion::HandOptDslash; WilsonFermion::WilsonFermion(LatticeGaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - double _mass) : + GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, + RealD _mass) : _grid(&Fgrid), _cbgrid(&Hgrid), Stencil (&Fgrid,npoint,Even,directions,displacements), diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 5c208131..6040d328 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -5,7 +5,7 @@ namespace Grid { namespace QCD { - class WilsonFermion : public FermionAction + class WilsonFermion : public FermionOperator { public: @@ -44,7 +44,7 @@ namespace Grid { int dag); // Constructor - WilsonFermion(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,double _mass); + WilsonFermion(LatticeGaugeField &_Umu,GridCartesian &Fgrid,GridRedBlackCartesian &Hgrid,RealD _mass); // DoubleStore void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu); @@ -57,7 +57,7 @@ namespace Grid { protected: - double mass; + RealD mass; GridBase * _grid; GridBase * _cbgrid; diff --git a/lib/qcd/action/fermion/FiveDimWilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc similarity index 68% rename from lib/qcd/action/fermion/FiveDimWilsonFermion.cc rename to lib/qcd/action/fermion/WilsonFermion5D.cc index 43645899..d22701b0 100644 --- a/lib/qcd/action/fermion/FiveDimWilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -4,18 +4,18 @@ namespace Grid { namespace QCD { // S-direction is INNERMOST and takes no part in the parity. - const std::vector FiveDimWilsonFermion::directions ({1,2,3,4, 1, 2, 3, 4}); - const std::vector FiveDimWilsonFermion::displacements({1,1,1,1,-1,-1,-1,-1}); + const std::vector WilsonFermion5D::directions ({1,2,3,4, 1, 2, 3, 4}); + const std::vector WilsonFermion5D::displacements({1,1,1,1,-1,-1,-1,-1}); - int FiveDimWilsonFermion::HandOptDslash; + int WilsonFermion5D::HandOptDslash; // 5d lattice for DWF. - FiveDimWilsonFermion::FiveDimWilsonFermion(LatticeGaugeField &_Umu, + WilsonFermion5D::WilsonFermion5D(LatticeGaugeField &_Umu, GridCartesian &FiveDimGrid, GridRedBlackCartesian &FiveDimRedBlackGrid, GridCartesian &FourDimGrid, GridRedBlackCartesian &FourDimRedBlackGrid, - double _mass) : + RealD _M5) : _FiveDimGrid(&FiveDimGrid), _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), _FourDimGrid(&FourDimGrid), @@ -23,7 +23,7 @@ namespace QCD { Stencil (_FiveDimGrid,npoint,Even,directions,displacements), StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd - mass(_mass), + M5(_M5), Umu(_FourDimGrid), UmuEven(_FourDimRedBlackGrid), UmuOdd (_FourDimRedBlackGrid), @@ -70,7 +70,7 @@ namespace QCD { pickCheckerboard(Even,UmuEven,Umu); pickCheckerboard(Odd ,UmuOdd,Umu); } -void FiveDimWilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu) +void WilsonFermion5D::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu) { conformable(Uds._grid,GaugeGrid()); conformable(Umu._grid,GaugeGrid()); @@ -82,60 +82,9 @@ void FiveDimWilsonFermion::DoubleStore(LatticeDoubledGaugeField &Uds,const Latti pokeIndex(Uds,U,mu+4); } } - -RealD FiveDimWilsonFermion::M(const LatticeFermion &in, LatticeFermion &out) -{ - out.checkerboard=in.checkerboard; - Dhop(in,out,DaggerNo); - return axpy_norm(out,5.0-M5,in,out); -} -RealD FiveDimWilsonFermion::Mdag(const LatticeFermion &in, LatticeFermion &out) -{ - out.checkerboard=in.checkerboard; - Dhop(in,out,DaggerYes); - return axpy_norm(out,5.0-M5,in,out); -} -void FiveDimWilsonFermion::Meooe(const LatticeFermion &in, LatticeFermion &out) -{ - if ( in.checkerboard == Odd ) { - DhopEO(in,out,DaggerNo); - } else { - DhopOE(in,out,DaggerNo); - } -} -void FiveDimWilsonFermion::MeooeDag(const LatticeFermion &in, LatticeFermion &out) -{ - if ( in.checkerboard == Odd ) { - DhopEO(in,out,DaggerYes); - } else { - DhopOE(in,out,DaggerYes); - } -} -void FiveDimWilsonFermion::Mooee(const LatticeFermion &in, LatticeFermion &out) -{ - out.checkerboard = in.checkerboard; - out = (5.0-M5)*in; - return ; -} -void FiveDimWilsonFermion::MooeeDag(const LatticeFermion &in, LatticeFermion &out) -{ - out.checkerboard = in.checkerboard; - Mooee(in,out); -} -void FiveDimWilsonFermion::MooeeInv(const LatticeFermion &in, LatticeFermion &out) -{ - out.checkerboard = in.checkerboard; - out = (1.0/(5.0-M5))*in; - return ; -} -void FiveDimWilsonFermion::MooeeInvDag(const LatticeFermion &in, LatticeFermion &out) -{ - out.checkerboard = in.checkerboard; - MooeeInv(in,out); -} -void FiveDimWilsonFermion::DhopInternal(CartesianStencil & st, LebesgueOrder &lo, - LatticeDoubledGaugeField & U, - const LatticeFermion &in, LatticeFermion &out,int dag) +void WilsonFermion5D::DhopInternal(CartesianStencil & st, LebesgueOrder &lo, + LatticeDoubledGaugeField & U, + const LatticeFermion &in, LatticeFermion &out,int dag) { assert((dag==DaggerNo) ||(dag==DaggerYes)); @@ -150,19 +99,21 @@ void FiveDimWilsonFermion::DhopInternal(CartesianStencil & st, LebesgueOrder &lo // - 8 linear access unit stride streams per thread for Fermion for hw prefetchable. if ( dag == DaggerYes ) { if( HandOptDslash ) { - for(int ss=0;ssoSites();ss++){ - int sU=lo.Reorder(ss); PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ for(int s=0;soSites();ss++){ - int sU=lo.Reorder(ss); PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ for(int s=0;soSites();ss++){ - int sU=lo.Reorder(ss); for(int s=0;soSites();ss++){ - int sU=lo.Reorder(ss); PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss++){ for(int s=0;s + class WilsonFermion5D : public FermionOperator { public: /////////////////////////////////////////////////////////////// @@ -26,19 +26,21 @@ namespace Grid { GridBase *FermionGrid(void) { return _FiveDimGrid;} GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} - // override multiply - virtual RealD M (const LatticeFermion &in, LatticeFermion &out); - virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out); + // full checkerboard operations; leave unimplemented as abstract for now + //virtual RealD M (const LatticeFermion &in, LatticeFermion &out)=0; + //virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out)=0; - // half checkerboard operaions - virtual void Meooe (const LatticeFermion &in, LatticeFermion &out); - virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out); - virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); - virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); - virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); - virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); + // half checkerboard operations; leave unimplemented as abstract for now + // virtual void Meooe (const LatticeFermion &in, LatticeFermion &out)=0; + // virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out)=0; + // virtual void Mooee (const LatticeFermion &in, LatticeFermion &out)=0; + // virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out)=0; + // virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out)=0; + // virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out)=0; - // non-hermitian hopping term; half cb or both + // Implement hopping term non-hermitian hopping term; half cb or both + // Implement s-diagonal DW + void DW (const LatticeFermion &in, LatticeFermion &out,int dag); void Dhop (const LatticeFermion &in, LatticeFermion &out,int dag); void DhopOE(const LatticeFermion &in, LatticeFermion &out,int dag); void DhopEO(const LatticeFermion &in, LatticeFermion &out,int dag); @@ -54,12 +56,12 @@ namespace Grid { int dag); // Constructors - FiveDimWilsonFermion(LatticeGaugeField &_Umu, + WilsonFermion5D(LatticeGaugeField &_Umu, GridCartesian &FiveDimGrid, GridRedBlackCartesian &FiveDimRedBlackGrid, GridCartesian &FourDimGrid, GridRedBlackCartesian &FourDimRedBlackGrid, - double _mass); + double _M5); // DoubleStore void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu); @@ -82,7 +84,6 @@ namespace Grid { static const std::vector displacements; double M5; - double mass; int Ls; //Defines the stencils for even and odd diff --git a/lib/stencil/Grid_stencil_common.cc b/lib/stencil/Grid_stencil_common.cc index f0f8c581..7f894faf 100644 --- a/lib/stencil/Grid_stencil_common.cc +++ b/lib/stencil/Grid_stencil_common.cc @@ -52,8 +52,8 @@ namespace Grid { // up a table containing the npoint "neighbours" and whether they // live in lattice or a comms buffer. if ( !comm_dim ) { - sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Even); - sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Odd); + sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); + sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); if ( sshift[0] == sshift[1] ) { Local(point,dimension,shift,0x3); @@ -63,8 +63,8 @@ namespace Grid { } } else { // All permute extract done in comms phase prior to Stencil application // So tables are the same whether comm_dim or splice_dim - sshift[0] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Even); - sshift[1] = _grid->CheckerBoardShift(_checkerboard,dimension,shift,Odd); + sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); + sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); if ( sshift[0] == sshift[1] ) { Comms(point,dimension,shift,0x3); } else { @@ -96,7 +96,7 @@ namespace Grid { int cb= (cbmask==0x2)? Odd : Even; - int sshift = _grid->CheckerBoardShift(_checkerboard,dimension,shift,cb); + int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb); int sx = (x+sshift)%rd; int permute_slice=0; @@ -134,7 +134,7 @@ namespace Grid { // send to one or more remote nodes. int cb= (cbmask==0x2)? Odd : Even; - int sshift= _grid->CheckerBoardShift(_checkerboard,dimension,shift,cb); + int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb); for(int x=0;x Date: Tue, 2 Jun 2015 17:25:26 +0100 Subject: [PATCH 04/22] Reorg; moving prec/unprec/schur CG for Wilson and DWF into tests as these are really tests and not benchmarks (no performance reports, only convergence test). --- benchmarks/Makefile.am | 39 ++------------- {benchmarks => tests}/Grid_dwf_cg_prec.cc | 0 {benchmarks => tests}/Grid_dwf_cg_schur.cc | 0 {benchmarks => tests}/Grid_dwf_cg_unprec.cc | 0 {benchmarks => tests}/Grid_dwf_even_odd.cc | 0 {benchmarks => tests}/Grid_wilson_cg_prec.cc | 0 {benchmarks => tests}/Grid_wilson_cg_schur.cc | 0 .../Grid_wilson_cg_unprec.cc | 0 {benchmarks => tests}/Grid_wilson_evenodd.cc | 0 tests/Makefile.am | 50 ++++++++++++++++++- 10 files changed, 52 insertions(+), 37 deletions(-) rename {benchmarks => tests}/Grid_dwf_cg_prec.cc (100%) rename {benchmarks => tests}/Grid_dwf_cg_schur.cc (100%) rename {benchmarks => tests}/Grid_dwf_cg_unprec.cc (100%) rename {benchmarks => tests}/Grid_dwf_even_odd.cc (100%) rename {benchmarks => tests}/Grid_wilson_cg_prec.cc (100%) rename {benchmarks => tests}/Grid_wilson_cg_schur.cc (100%) rename {benchmarks => tests}/Grid_wilson_cg_unprec.cc (100%) rename {benchmarks => tests}/Grid_wilson_evenodd.cc (100%) diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index e92f2e84..e1e092f4 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -10,53 +10,20 @@ bin_PROGRAMS = \ Grid_memory_bandwidth \ Grid_su3 \ Grid_wilson \ - Grid_wilson_evenodd \ - Grid_wilson_cg_unprec \ - Grid_wilson_cg_prec \ - Grid_wilson_cg_schur \ - Grid_dwf\ - Grid_dwf_even_odd\ - Grid_dwf_cg_unprec\ - Grid_dwf_cg_prec\ - Grid_dwf_cg_schur + Grid_dwf Grid_comms_SOURCES = Grid_comms.cc Grid_comms_LDADD = -lGrid -Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc -Grid_su3_LDADD = -lGrid - Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc Grid_memory_bandwidth_LDADD = -lGrid +Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc +Grid_su3_LDADD = -lGrid Grid_wilson_SOURCES = Grid_wilson.cc Grid_wilson_LDADD = -lGrid -Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc -Grid_wilson_evenodd_LDADD = -lGrid - -Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc -Grid_wilson_cg_unprec_LDADD = -lGrid - -Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc -Grid_wilson_cg_prec_LDADD = -lGrid - -Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc -Grid_wilson_cg_schur_LDADD = -lGrid - Grid_dwf_SOURCES = Grid_dwf.cc Grid_dwf_LDADD = -lGrid -Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc -Grid_dwf_even_odd_LDADD = -lGrid - -Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc -Grid_dwf_cg_unprec_LDADD = -lGrid - -Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc -Grid_dwf_cg_prec_LDADD = -lGrid - -Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc -Grid_dwf_cg_schur_LDADD = -lGrid - diff --git a/benchmarks/Grid_dwf_cg_prec.cc b/tests/Grid_dwf_cg_prec.cc similarity index 100% rename from benchmarks/Grid_dwf_cg_prec.cc rename to tests/Grid_dwf_cg_prec.cc diff --git a/benchmarks/Grid_dwf_cg_schur.cc b/tests/Grid_dwf_cg_schur.cc similarity index 100% rename from benchmarks/Grid_dwf_cg_schur.cc rename to tests/Grid_dwf_cg_schur.cc diff --git a/benchmarks/Grid_dwf_cg_unprec.cc b/tests/Grid_dwf_cg_unprec.cc similarity index 100% rename from benchmarks/Grid_dwf_cg_unprec.cc rename to tests/Grid_dwf_cg_unprec.cc diff --git a/benchmarks/Grid_dwf_even_odd.cc b/tests/Grid_dwf_even_odd.cc similarity index 100% rename from benchmarks/Grid_dwf_even_odd.cc rename to tests/Grid_dwf_even_odd.cc diff --git a/benchmarks/Grid_wilson_cg_prec.cc b/tests/Grid_wilson_cg_prec.cc similarity index 100% rename from benchmarks/Grid_wilson_cg_prec.cc rename to tests/Grid_wilson_cg_prec.cc diff --git a/benchmarks/Grid_wilson_cg_schur.cc b/tests/Grid_wilson_cg_schur.cc similarity index 100% rename from benchmarks/Grid_wilson_cg_schur.cc rename to tests/Grid_wilson_cg_schur.cc diff --git a/benchmarks/Grid_wilson_cg_unprec.cc b/tests/Grid_wilson_cg_unprec.cc similarity index 100% rename from benchmarks/Grid_wilson_cg_unprec.cc rename to tests/Grid_wilson_cg_unprec.cc diff --git a/benchmarks/Grid_wilson_evenodd.cc b/tests/Grid_wilson_evenodd.cc similarity index 100% rename from benchmarks/Grid_wilson_evenodd.cc rename to tests/Grid_wilson_evenodd.cc diff --git a/tests/Makefile.am b/tests/Makefile.am index 7ef4eb1f..6f92a94d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -5,7 +5,30 @@ AM_LDFLAGS = -L$(top_builddir)/lib # # Test code # -bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_cshift_red_black +bin_PROGRAMS = Grid_main \ + Grid_simd \ + Grid_gamma \ + Grid_cshift \ + Grid_cshift_red_black \ + Grid_stencil \ + Grid_nersc_io \ + Grid_rng \ + Grid_remez \ + Grid_rng_fixed \ + Grid_wilson_evenodd \ + Grid_wilson_cg_unprec \ + Grid_wilson_cg_prec \ + Grid_wilson_cg_schur \ + Grid_dwf_even_odd\ + Grid_dwf_cg_unprec\ + Grid_dwf_cg_prec\ + Grid_dwf_cg_schur + +test: + for f in $bin_PROGRAMS + do + ./$f > $f.log + done Grid_main_SOURCES = Grid_main.cc Grid_main_LDADD = -lGrid @@ -39,3 +62,28 @@ Grid_simd_LDADD = -lGrid #Grid_simd_new_SOURCES = Grid_simd_new.cc #Grid_simd_new_LDADD = -lGrid + +Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc +Grid_wilson_evenodd_LDADD = -lGrid + +Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc +Grid_wilson_cg_unprec_LDADD = -lGrid + +Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc +Grid_wilson_cg_prec_LDADD = -lGrid + +Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc +Grid_wilson_cg_schur_LDADD = -lGrid + +Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc +Grid_dwf_even_odd_LDADD = -lGrid + +Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc +Grid_dwf_cg_unprec_LDADD = -lGrid + +Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc +Grid_dwf_cg_prec_LDADD = -lGrid + +Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc +Grid_dwf_cg_schur_LDADD = -lGrid + From 68e26140ee0b497286a3d661958583d10178178c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 09:36:26 +0100 Subject: [PATCH 05/22] Mobius Caley form, Mobius Zolotarev operators. Pass Even Odd vs unprec test and hermiticity checks in tests/Grid_any_evenodd.cc; will work on inversion tests shortly. --- lib/qcd/action/Actions.h | 8 +- lib/qcd/action/fermion/CayleyFermion5D.cc | 110 ++++++++- lib/qcd/action/fermion/CayleyFermion5D.h | 6 +- lib/qcd/action/fermion/DomainWallFermion.h | 83 +------ lib/qcd/action/fermion/MobiusFermion.h | 46 ++++ .../action/fermion/MobiusZolotarevFermion.h | 48 ++++ lib/qcd/action/fermion/ScaledShamir.h | 51 ++++ lib/qcd/action/fermion/WilsonFermion5D.h | 12 + tests/Grid_any_evenodd.cc | 226 ++++++++++++++++++ tests/Makefile.am | 12 +- 10 files changed, 507 insertions(+), 95 deletions(-) create mode 100644 lib/qcd/action/fermion/MobiusFermion.h create mode 100644 lib/qcd/action/fermion/MobiusZolotarevFermion.h create mode 100644 lib/qcd/action/fermion/ScaledShamir.h create mode 100644 tests/Grid_any_evenodd.cc diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index acbf027c..d37d1cd4 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -38,6 +38,9 @@ //#include #include +#include +#include +#include //#include @@ -70,11 +73,6 @@ class LinearGaugeAction : public GaugeAction< multi1d, multi1d > typedef multi1d P; - typedef multi1d Q; - virtual void staple(LatticeColorMatrix& result, - const Handle< GaugeState >& state, - int mu, int cb) const = 0; */ - #endif diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 263cc28b..be528e79 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -15,7 +15,6 @@ namespace QCD { FourDimRedBlackGrid,_M5), mass(_mass) { - std::cout << "Constructing a CayleyFermion5D"<gamma[i]); //NB reciprocal relative to Chroma NEF code + bs[i] = 0.5*(bpc/omega[i] + bmc); + cs[i] = 0.5*(bpc/omega[i] - bmc); + } + + //////////////////////////////////////////////////////// + // Constants for the preconditioned matrix Cayley form + //////////////////////////////////////////////////////// + bee.resize(Ls); + cee.resize(Ls); + beo.resize(Ls); + ceo.resize(Ls); + + for(int i=0;i omega; std::vector bs; // S dependent coeffs @@ -53,6 +51,8 @@ namespace Grid { GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5); + protected: + void SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c); }; } diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h index 2abb6eb2..3e6a9739 100644 --- a/lib/qcd/action/fermion/DomainWallFermion.h +++ b/lib/qcd/action/fermion/DomainWallFermion.h @@ -28,86 +28,13 @@ namespace Grid { { RealD eps = 1.0; - zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham + Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham assert(zdata->n==this->Ls); + + std::cout << "DomainWallFermion with Ls="<CayleyFermion5D::SetCoefficients(1.0,zdata,1.0,0.0); - /////////////////////////////////////////////////////////// - // The Cayley coeffs (unprec) - /////////////////////////////////////////////////////////// - this->omega.resize(this->Ls); - this->bs.resize(this->Ls); - this->cs.resize(this->Ls); - this->as.resize(this->Ls); - - for(int i=0; i < this->Ls; i++){ - this->as[i] = 1.0; - this->omega[i] = ((double)zdata -> gamma[i]); - double bb=1.0; - this->bs[i] = 0.5*(bb/(this->omega[i]) + 1.0); - this->cs[i] = 0.5*(bb/(this->omega[i]) - 1.0); - } - - //////////////////////////////////////////////////////// - // Constants for the preconditioned matrix Cayley form - //////////////////////////////////////////////////////// - this->bee.resize(this->Ls); - this->cee.resize(this->Ls); - this->beo.resize(this->Ls); - this->ceo.resize(this->Ls); - - for(int i=0;iLs;i++){ - this->bee[i]=as[i]*(bs[i]*(4.0-M5) +1.0); - this->cee[i]=as[i]*(1.0-cs[i]*(4.0-M5)); - this->beo[i]=as[i]*bs[i]; - this->ceo[i]=-as[i]*cs[i]; - } - - aee.resize(this->Ls); - aeo.resize(this->Ls); - for(int i=0;iLs;i++){ - aee[i]=cee[i]; - aeo[i]=ceo[i]; - } - - ////////////////////////////////////////// - // LDU decomposition of eeoo - ////////////////////////////////////////// - dee.resize(this->Ls); - lee.resize(this->Ls); - leem.resize(this->Ls); - uee.resize(this->Ls); - ueem.resize(this->Ls); - - for(int i=0;iLs;i++){ - - dee[i] = bee[i]; - - if ( i < this->Ls-1 ) { - - lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column - - leem[i]=this->mass*cee[this->Ls-1]/bee[0]; - for(int j=0;jmass; - for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j]; - ueem[i]*= aee[0]/bee[0]; - - } else { - lee[i] =0.0; - leem[i]=0.0; - uee[i] =0.0; - ueem[i]=0.0; - } - } - - { - double delta_d=mass*cee[this->Ls-1]; - for(int j=0;jLs-1;j++) delta_d *= cee[j]/bee[j]; - dee[this->Ls-1] += delta_d; - } } }; diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h new file mode 100644 index 00000000..4c291fad --- /dev/null +++ b/lib/qcd/action/fermion/MobiusFermion.h @@ -0,0 +1,46 @@ +#ifndef GRID_QCD_MOBIUS_FERMION_H +#define GRID_QCD_MOBIUS_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class MobiusFermion : public CayleyFermion5D + { + public: + + // Constructors + MobiusFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD b, RealD c) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5) + + { + RealD eps = 1.0; + + std::cout << "MobiusFermion (b="<Ls);// eps is ignored for higham + assert(zdata->n==this->Ls); + + // Call base setter + this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c); + + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h new file mode 100644 index 00000000..866d0c39 --- /dev/null +++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h @@ -0,0 +1,48 @@ +#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H +#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class MobiusZolotarevFermion : public CayleyFermion5D + { + public: + + // Constructors + MobiusZolotarevFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD b, RealD c, + RealD lo, RealD hi) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5) + + { + RealD eps = lo/hi; + + Approx::zolotarev_data *zdata = Approx::grid_zolotarev(eps,this->Ls,0);// eps is ignored for higham + assert(zdata->n==this->Ls); + + std::cout << "MobiusZolotarevFermion (b="<CayleyFermion5D::SetCoefficients(1.0,zdata,b,c); + + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/ScaledShamir.h b/lib/qcd/action/fermion/ScaledShamir.h new file mode 100644 index 00000000..a1fd33d0 --- /dev/null +++ b/lib/qcd/action/fermion/ScaledShamir.h @@ -0,0 +1,51 @@ +#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H +#define GRID_QCD_DOMAIN_WALL_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class ScaledShamirFermion : public CayleyFermion5D + { + public: + + // Constructors + ScaledShamirFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, RealD scale) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5, + RealD b, + RealD c) + + { + RealD eps = 1.0; + + Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham + assert(zdata->n==this->Ls); + + //b+c = scale; + //b-c = 1 + //b = 0.5(scale+1); + //c = 0.5(scale-1); + + // Call base setter + this->CayleyFermion5D::SetCoefficients(1.0,zdata,0.5*(scale+1.0),0.5*(scale-1.0)); + + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index d4777d01..062c4d82 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -14,6 +14,18 @@ namespace Grid { // i.e. even even contains fifth dim hopping term. // // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] + //////////////////////////// + //ContFrac: + // Ls always odd. Rational poly deg is either Ls or Ls-1 + //PartFrac + // Ls always odd. Rational poly deg is either Ls or Ls-1 + // + //Cayley: Ls always even, Rational poly deg is Ls + // + // Just set nrational as Ls. Forget about Ls-1 cases. + // + // Require odd Ls for cont and part frac + //////////////////////////// //////////////////////////////////////////////////////////////////////////////// class WilsonFermion5D : public FermionOperator { diff --git a/tests/Grid_any_evenodd.cc b/tests/Grid_any_evenodd.cc new file mode 100644 index 00000000..8d8580c1 --- /dev/null +++ b/tests/Grid_any_evenodd.cc @@ -0,0 +1,226 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + +template +void TestWhat(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, GridParallelRNG *RNG5); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout << "Grid is setup to use "< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + std::vector U(4,UGrid); + + RealD mass=0.1; + RealD M5 =1.8; + DomainWallFermion Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); + TestWhat(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + RealD b=1.5;// Scale factor b+c=2, b-c=1 + RealD c=0.5; + MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); + TestWhat(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,4.0); + TestWhat(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + + Grid_finalize(); +} + +template +void TestWhat(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion phi (FGrid); random(*RNG5,phi); + LatticeFermion chi (FGrid); random(*RNG5,chi); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); tmp=zero; + LatticeFermion err(FGrid); tmp=zero; + + LatticeFermion src_e (FrbGrid); + LatticeFermion src_o (FrbGrid); + LatticeFermion r_e (FrbGrid); + LatticeFermion r_o (FrbGrid); + LatticeFermion r_eo (FGrid); + LatticeFermion r_eeoo(FGrid); + + std::cout<<"=========================================================="< * = < chi | Deo^dag| phi> "< $f.log - done Grid_main_SOURCES = Grid_main.cc Grid_main_LDADD = -lGrid @@ -66,6 +63,9 @@ Grid_simd_LDADD = -lGrid Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc Grid_wilson_evenodd_LDADD = -lGrid +Grid_any_evenodd_SOURCES = Grid_any_evenodd.cc +Grid_any_evenodd_LDADD = -lGrid + Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc Grid_wilson_cg_unprec_LDADD = -lGrid From c659c76053256c87a94bbc17e7a169b3aefc7f6c Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 09:51:06 +0100 Subject: [PATCH 06/22] Scaled Shamir and Scaled Shamir Zolotarev aliases for special cases of Mobius. --- lib/qcd/action/Actions.h | 3 +- lib/qcd/action/fermion/ScaledShamir.h | 51 ------------------- lib/qcd/action/fermion/ScaledShamirFermion.h | 37 ++++++++++++++ .../fermion/ScaledShamirZolotarevFermion.h | 39 ++++++++++++++ tests/Grid_any_evenodd.cc | 8 ++- 5 files changed, 85 insertions(+), 53 deletions(-) delete mode 100644 lib/qcd/action/fermion/ScaledShamir.h create mode 100644 lib/qcd/action/fermion/ScaledShamirFermion.h create mode 100644 lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index d37d1cd4..893564e0 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -41,7 +41,8 @@ #include #include #include -//#include +#include +#include // Chroma interface defining FermionAction diff --git a/lib/qcd/action/fermion/ScaledShamir.h b/lib/qcd/action/fermion/ScaledShamir.h deleted file mode 100644 index a1fd33d0..00000000 --- a/lib/qcd/action/fermion/ScaledShamir.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H -#define GRID_QCD_DOMAIN_WALL_FERMION_H - -#include - -namespace Grid { - - namespace QCD { - - class ScaledShamirFermion : public CayleyFermion5D - { - public: - - // Constructors - ScaledShamirFermion(LatticeGaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, RealD scale) : - - CayleyFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5, - RealD b, - RealD c) - - { - RealD eps = 1.0; - - Approx::zolotarev_data *zdata = Approx::grid_higham(eps,this->Ls);// eps is ignored for higham - assert(zdata->n==this->Ls); - - //b+c = scale; - //b-c = 1 - //b = 0.5(scale+1); - //c = 0.5(scale-1); - - // Call base setter - this->CayleyFermion5D::SetCoefficients(1.0,zdata,0.5*(scale+1.0),0.5*(scale-1.0)); - - } - - }; - - } -} - -#endif diff --git a/lib/qcd/action/fermion/ScaledShamirFermion.h b/lib/qcd/action/fermion/ScaledShamirFermion.h new file mode 100644 index 00000000..59fb16a8 --- /dev/null +++ b/lib/qcd/action/fermion/ScaledShamirFermion.h @@ -0,0 +1,37 @@ +#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H +#define GRID_QCD_SCALED_SHAMIR_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class ScaledShamirFermion : public MobiusFermion + { + public: + + // Constructors + ScaledShamirFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale) : + + // b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1 + MobiusFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0)) + { + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h b/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h new file mode 100644 index 00000000..a4c88d5d --- /dev/null +++ b/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h @@ -0,0 +1,39 @@ +#ifndef GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H +#define GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class ScaledShamirZolotarevFermion : public MobiusZolotarevFermion + { + public: + + // Constructors + + + ScaledShamirZolotarevFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale, + RealD lo, RealD hi) : + + MobiusZolotarevFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),lo,hi) + + {} + + }; + + } +} + +#endif diff --git a/tests/Grid_any_evenodd.cc b/tests/Grid_any_evenodd.cc index 8d8580c1..de1e3c78 100644 --- a/tests/Grid_any_evenodd.cc +++ b/tests/Grid_any_evenodd.cc @@ -56,9 +56,15 @@ int main (int argc, char ** argv) MobiusFermion Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); TestWhat(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,4.0); + MobiusZolotarevFermion Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0); TestWhat(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0); + TestWhat(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + ScaledShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0,0.1,2.0); + TestWhat(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + Grid_finalize(); } From 2b083ca987e8c4cccaa9a042c25e59b515080901 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 10:54:03 +0100 Subject: [PATCH 07/22] CG test written and passes i.e. converges with small true residual in RedBlack MpcDagMpc, Unprec MdagM and Schur red black solver for each of. DomainWallFermion MobiusFermion MobiusZolotarevFermion ScaledShamirFermion ScaledShamirZolotarevFermion --- .../{Grid_comms.cc => Benchmark_comms.cc} | 0 benchmarks/{Grid_dwf.cc => Benchmark_dwf.cc} | 0 ...width.cc => Benchmark_memory_bandwidth.cc} | 0 benchmarks/{Grid_su3.cc => Benchmark_su3.cc} | 0 ...Grid_su3_expr.cc => Benchmark_su3_expr.cc} | 0 ...Grid_su3_test.cc => Benchmark_su3_test.cc} | 0 .../{Grid_wilson.cc => Benchmark_wilson.cc} | 0 benchmarks/Makefile.am | 30 ++-- tests/Makefile.am | 121 ++++++------- tests/{Grid_cshift.cc => Test_cshift.cc} | 0 ..._red_black.cc => Test_cshift_red_black.cc} | 0 ...rid_dwf_cg_prec.cc => Test_dwf_cg_prec.cc} | 0 ...d_dwf_cg_schur.cc => Test_dwf_cg_schur.cc} | 0 ...dwf_cg_unprec.cc => Test_dwf_cg_unprec.cc} | 0 ...d_dwf_even_odd.cc => Test_dwf_even_odd.cc} | 0 tests/{Grid_gamma.cc => Test_gamma.cc} | 0 tests/{Grid_main.cc => Test_main.cc} | 0 tests/Test_many_cg.cc | 164 ++++++++++++++++++ ...id_any_evenodd.cc => Test_many_evenodd.cc} | 0 tests/{Grid_nersc_io.cc => Test_nersc_io.cc} | 0 tests/{Grid_remez.cc => Test_remez.cc} | 0 tests/{Grid_rng.cc => Test_rng.cc} | 0 .../{Grid_rng_fixed.cc => Test_rng_fixed.cc} | 0 tests/{Grid_simd.cc => Test_simd.cc} | 0 tests/{Grid_simd_new.cc => Test_simd_new.cc} | 0 tests/{Grid_stencil.cc => Test_stencil.cc} | 0 ...lson_cg_prec.cc => Test_wilson_cg_prec.cc} | 0 ...on_cg_schur.cc => Test_wilson_cg_schur.cc} | 0 ..._cg_unprec.cc => Test_wilson_cg_unprec.cc} | 0 ...lson_evenodd.cc => Test_wilson_evenodd.cc} | 0 30 files changed, 241 insertions(+), 74 deletions(-) rename benchmarks/{Grid_comms.cc => Benchmark_comms.cc} (100%) rename benchmarks/{Grid_dwf.cc => Benchmark_dwf.cc} (100%) rename benchmarks/{Grid_memory_bandwidth.cc => Benchmark_memory_bandwidth.cc} (100%) rename benchmarks/{Grid_su3.cc => Benchmark_su3.cc} (100%) rename benchmarks/{Grid_su3_expr.cc => Benchmark_su3_expr.cc} (100%) rename benchmarks/{Grid_su3_test.cc => Benchmark_su3_test.cc} (100%) rename benchmarks/{Grid_wilson.cc => Benchmark_wilson.cc} (100%) rename tests/{Grid_cshift.cc => Test_cshift.cc} (100%) rename tests/{Grid_cshift_red_black.cc => Test_cshift_red_black.cc} (100%) rename tests/{Grid_dwf_cg_prec.cc => Test_dwf_cg_prec.cc} (100%) rename tests/{Grid_dwf_cg_schur.cc => Test_dwf_cg_schur.cc} (100%) rename tests/{Grid_dwf_cg_unprec.cc => Test_dwf_cg_unprec.cc} (100%) rename tests/{Grid_dwf_even_odd.cc => Test_dwf_even_odd.cc} (100%) rename tests/{Grid_gamma.cc => Test_gamma.cc} (100%) rename tests/{Grid_main.cc => Test_main.cc} (100%) create mode 100644 tests/Test_many_cg.cc rename tests/{Grid_any_evenodd.cc => Test_many_evenodd.cc} (100%) rename tests/{Grid_nersc_io.cc => Test_nersc_io.cc} (100%) rename tests/{Grid_remez.cc => Test_remez.cc} (100%) rename tests/{Grid_rng.cc => Test_rng.cc} (100%) rename tests/{Grid_rng_fixed.cc => Test_rng_fixed.cc} (100%) rename tests/{Grid_simd.cc => Test_simd.cc} (100%) rename tests/{Grid_simd_new.cc => Test_simd_new.cc} (100%) rename tests/{Grid_stencil.cc => Test_stencil.cc} (100%) rename tests/{Grid_wilson_cg_prec.cc => Test_wilson_cg_prec.cc} (100%) rename tests/{Grid_wilson_cg_schur.cc => Test_wilson_cg_schur.cc} (100%) rename tests/{Grid_wilson_cg_unprec.cc => Test_wilson_cg_unprec.cc} (100%) rename tests/{Grid_wilson_evenodd.cc => Test_wilson_evenodd.cc} (100%) diff --git a/benchmarks/Grid_comms.cc b/benchmarks/Benchmark_comms.cc similarity index 100% rename from benchmarks/Grid_comms.cc rename to benchmarks/Benchmark_comms.cc diff --git a/benchmarks/Grid_dwf.cc b/benchmarks/Benchmark_dwf.cc similarity index 100% rename from benchmarks/Grid_dwf.cc rename to benchmarks/Benchmark_dwf.cc diff --git a/benchmarks/Grid_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc similarity index 100% rename from benchmarks/Grid_memory_bandwidth.cc rename to benchmarks/Benchmark_memory_bandwidth.cc diff --git a/benchmarks/Grid_su3.cc b/benchmarks/Benchmark_su3.cc similarity index 100% rename from benchmarks/Grid_su3.cc rename to benchmarks/Benchmark_su3.cc diff --git a/benchmarks/Grid_su3_expr.cc b/benchmarks/Benchmark_su3_expr.cc similarity index 100% rename from benchmarks/Grid_su3_expr.cc rename to benchmarks/Benchmark_su3_expr.cc diff --git a/benchmarks/Grid_su3_test.cc b/benchmarks/Benchmark_su3_test.cc similarity index 100% rename from benchmarks/Grid_su3_test.cc rename to benchmarks/Benchmark_su3_test.cc diff --git a/benchmarks/Grid_wilson.cc b/benchmarks/Benchmark_wilson.cc similarity index 100% rename from benchmarks/Grid_wilson.cc rename to benchmarks/Benchmark_wilson.cc diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index e1e092f4..74ff03c6 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -6,24 +6,24 @@ AM_LDFLAGS = -L$(top_builddir)/lib # Test code # bin_PROGRAMS = \ - Grid_comms \ - Grid_memory_bandwidth \ - Grid_su3 \ - Grid_wilson \ - Grid_dwf + Benchmark_comms \ + Benchmark_memory_bandwidth \ + Benchmark_su3 \ + Benchmark_wilson \ + Benchmark_dwf -Grid_comms_SOURCES = Grid_comms.cc -Grid_comms_LDADD = -lGrid +Benchmark_comms_SOURCES = Benchmark_comms.cc +Benchmark_comms_LDADD = -lGrid -Grid_memory_bandwidth_SOURCES = Grid_memory_bandwidth.cc -Grid_memory_bandwidth_LDADD = -lGrid +Benchmark_memory_bandwidth_SOURCES = Benchmark_memory_bandwidth.cc +Benchmark_memory_bandwidth_LDADD = -lGrid -Grid_su3_SOURCES = Grid_su3.cc Grid_su3_test.cc Grid_su3_expr.cc -Grid_su3_LDADD = -lGrid +Benchmark_su3_SOURCES = Benchmark_su3.cc Benchmark_su3_test.cc Benchmark_su3_expr.cc +Benchmark_su3_LDADD = -lGrid -Grid_wilson_SOURCES = Grid_wilson.cc -Grid_wilson_LDADD = -lGrid +Benchmark_wilson_SOURCES = Benchmark_wilson.cc +Benchmark_wilson_LDADD = -lGrid -Grid_dwf_SOURCES = Grid_dwf.cc -Grid_dwf_LDADD = -lGrid +Benchmark_dwf_SOURCES = Benchmark_dwf.cc +Benchmark_dwf_LDADD = -lGrid diff --git a/tests/Makefile.am b/tests/Makefile.am index f8f5df29..bb3448f1 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -5,85 +5,88 @@ AM_LDFLAGS = -L$(top_builddir)/lib # # Test code # -bin_PROGRAMS = Grid_main \ - Grid_simd \ - Grid_gamma \ - Grid_cshift \ - Grid_cshift_red_black \ - Grid_stencil \ - Grid_nersc_io \ - Grid_rng \ - Grid_remez \ - Grid_rng_fixed \ - Grid_wilson_evenodd \ - Grid_wilson_cg_unprec \ - Grid_wilson_cg_prec \ - Grid_wilson_cg_schur \ - Grid_dwf_even_odd\ - Grid_dwf_cg_unprec\ - Grid_dwf_cg_prec\ - Grid_dwf_cg_schur\ - Grid_any_evenodd +bin_PROGRAMS = Test_main \ + Test_simd \ + Test_gamma \ + Test_cshift \ + Test_cshift_red_black \ + Test_stencil \ + Test_nersc_io \ + Test_rng \ + Test_remez \ + Test_rng_fixed \ + Test_wilson_evenodd \ + Test_wilson_cg_unprec \ + Test_wilson_cg_prec \ + Test_wilson_cg_schur \ + Test_dwf_even_odd\ + Test_dwf_cg_unprec\ + Test_dwf_cg_prec\ + Test_dwf_cg_schur\ + Test_many_evenodd\ + Test_many_cg -Grid_main_SOURCES = Grid_main.cc -Grid_main_LDADD = -lGrid +Test_main_SOURCES = Test_main.cc +Test_main_LDADD = -lGrid -Grid_rng_SOURCES = Grid_rng.cc -Grid_rng_LDADD = -lGrid +Test_rng_SOURCES = Test_rng.cc +Test_rng_LDADD = -lGrid -Grid_rng_fixed_SOURCES = Grid_rng_fixed.cc -Grid_rng_fixed_LDADD = -lGrid +Test_rng_fixed_SOURCES = Test_rng_fixed.cc +Test_rng_fixed_LDADD = -lGrid -Grid_remez_SOURCES = Grid_remez.cc -Grid_remez_LDADD = -lGrid +Test_remez_SOURCES = Test_remez.cc +Test_remez_LDADD = -lGrid -Grid_nersc_io_SOURCES = Grid_nersc_io.cc -Grid_nersc_io_LDADD = -lGrid +Test_nersc_io_SOURCES = Test_nersc_io.cc +Test_nersc_io_LDADD = -lGrid -Grid_cshift_SOURCES = Grid_cshift.cc -Grid_cshift_LDADD = -lGrid +Test_cshift_SOURCES = Test_cshift.cc +Test_cshift_LDADD = -lGrid -Grid_cshift_red_black_SOURCES = Grid_cshift_red_black.cc -Grid_cshift_red_black_LDADD = -lGrid +Test_cshift_red_black_SOURCES = Test_cshift_red_black.cc +Test_cshift_red_black_LDADD = -lGrid -Grid_gamma_SOURCES = Grid_gamma.cc -Grid_gamma_LDADD = -lGrid +Test_gamma_SOURCES = Test_gamma.cc +Test_gamma_LDADD = -lGrid -Grid_stencil_SOURCES = Grid_stencil.cc -Grid_stencil_LDADD = -lGrid +Test_stencil_SOURCES = Test_stencil.cc +Test_stencil_LDADD = -lGrid -Grid_simd_SOURCES = Grid_simd.cc -Grid_simd_LDADD = -lGrid +Test_simd_SOURCES = Test_simd.cc +Test_simd_LDADD = -lGrid -#Grid_simd_new_SOURCES = Grid_simd_new.cc -#Grid_simd_new_LDADD = -lGrid +#Test_simd_new_SOURCES = Test_simd_new.cc +#Test_simd_new_LDADD = -lGrid -Grid_wilson_evenodd_SOURCES = Grid_wilson_evenodd.cc -Grid_wilson_evenodd_LDADD = -lGrid +Test_wilson_evenodd_SOURCES = Test_wilson_evenodd.cc +Test_wilson_evenodd_LDADD = -lGrid -Grid_any_evenodd_SOURCES = Grid_any_evenodd.cc -Grid_any_evenodd_LDADD = -lGrid +Test_wilson_cg_unprec_SOURCES = Test_wilson_cg_unprec.cc +Test_wilson_cg_unprec_LDADD = -lGrid -Grid_wilson_cg_unprec_SOURCES = Grid_wilson_cg_unprec.cc -Grid_wilson_cg_unprec_LDADD = -lGrid +Test_wilson_cg_prec_SOURCES = Test_wilson_cg_prec.cc +Test_wilson_cg_prec_LDADD = -lGrid -Grid_wilson_cg_prec_SOURCES = Grid_wilson_cg_prec.cc -Grid_wilson_cg_prec_LDADD = -lGrid +Test_wilson_cg_schur_SOURCES = Test_wilson_cg_schur.cc +Test_wilson_cg_schur_LDADD = -lGrid -Grid_wilson_cg_schur_SOURCES = Grid_wilson_cg_schur.cc -Grid_wilson_cg_schur_LDADD = -lGrid +Test_dwf_even_odd_SOURCES = Test_dwf_even_odd.cc +Test_dwf_even_odd_LDADD = -lGrid -Grid_dwf_even_odd_SOURCES = Grid_dwf_even_odd.cc -Grid_dwf_even_odd_LDADD = -lGrid +Test_dwf_cg_unprec_SOURCES = Test_dwf_cg_unprec.cc +Test_dwf_cg_unprec_LDADD = -lGrid -Grid_dwf_cg_unprec_SOURCES = Grid_dwf_cg_unprec.cc -Grid_dwf_cg_unprec_LDADD = -lGrid +Test_dwf_cg_prec_SOURCES = Test_dwf_cg_prec.cc +Test_dwf_cg_prec_LDADD = -lGrid -Grid_dwf_cg_prec_SOURCES = Grid_dwf_cg_prec.cc -Grid_dwf_cg_prec_LDADD = -lGrid +Test_dwf_cg_schur_SOURCES = Test_dwf_cg_schur.cc +Test_dwf_cg_schur_LDADD = -lGrid -Grid_dwf_cg_schur_SOURCES = Grid_dwf_cg_schur.cc -Grid_dwf_cg_schur_LDADD = -lGrid +Test_many_evenodd_SOURCES = Test_many_evenodd.cc +Test_many_evenodd_LDADD = -lGrid +Test_many_cg_SOURCES = Test_many_cg.cc +Test_many_cg_LDADD = -lGrid diff --git a/tests/Grid_cshift.cc b/tests/Test_cshift.cc similarity index 100% rename from tests/Grid_cshift.cc rename to tests/Test_cshift.cc diff --git a/tests/Grid_cshift_red_black.cc b/tests/Test_cshift_red_black.cc similarity index 100% rename from tests/Grid_cshift_red_black.cc rename to tests/Test_cshift_red_black.cc diff --git a/tests/Grid_dwf_cg_prec.cc b/tests/Test_dwf_cg_prec.cc similarity index 100% rename from tests/Grid_dwf_cg_prec.cc rename to tests/Test_dwf_cg_prec.cc diff --git a/tests/Grid_dwf_cg_schur.cc b/tests/Test_dwf_cg_schur.cc similarity index 100% rename from tests/Grid_dwf_cg_schur.cc rename to tests/Test_dwf_cg_schur.cc diff --git a/tests/Grid_dwf_cg_unprec.cc b/tests/Test_dwf_cg_unprec.cc similarity index 100% rename from tests/Grid_dwf_cg_unprec.cc rename to tests/Test_dwf_cg_unprec.cc diff --git a/tests/Grid_dwf_even_odd.cc b/tests/Test_dwf_even_odd.cc similarity index 100% rename from tests/Grid_dwf_even_odd.cc rename to tests/Test_dwf_even_odd.cc diff --git a/tests/Grid_gamma.cc b/tests/Test_gamma.cc similarity index 100% rename from tests/Grid_gamma.cc rename to tests/Test_gamma.cc diff --git a/tests/Grid_main.cc b/tests/Test_main.cc similarity index 100% rename from tests/Grid_main.cc rename to tests/Test_main.cc diff --git a/tests/Test_many_cg.cc b/tests/Test_many_cg.cc new file mode 100644 index 00000000..3241a025 --- /dev/null +++ b/tests/Test_many_cg.cc @@ -0,0 +1,164 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + +template +void TestCGinversions(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); +template +void TestCGschur(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + +template +void TestCGunprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + +template +void TestCGprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout << "Grid is setup to use "< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + std::vector U(4,UGrid); + + RealD mass=0.1; + RealD M5 =1.8; + std::cout <<"DomainWallFermion test"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + RealD b=1.5;// Scale factor b+c=2, b-c=1 + RealD c=0.5; + std::cout <<"MobiusFermion test"<(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"MobiusZolotarevFermion test"<(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"ScaledShamirFermion test"<(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"ScaledShamirZolotarevFermion test"<(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + Grid_finalize(); +} +template +void TestCGinversions(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + std::cout << "Testing unpreconditioned inverter"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); + std::cout << "Testing red black preconditioned inverter"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); + std::cout << "Testing red black Schur inverter"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); +} + +template +void TestCGunprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion result(FGrid); result=zero; + + HermitianOperator HermOp(Ddwf); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOp,src,result); + +} +template +void TestCGprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion src_o(FrbGrid); + LatticeFermion result_o(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o=zero; + + HermitianCheckerBoardedOperator HermOpEO(Ddwf); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOpEO,src_o,result_o); +} + + +template +void TestCGschur(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion result(FGrid); result=zero; + + ConjugateGradient CG(1.0e-8,10000); + SchurRedBlackSolve SchurSolver(CG); + SchurSolver(Ddwf,src,result); +} diff --git a/tests/Grid_any_evenodd.cc b/tests/Test_many_evenodd.cc similarity index 100% rename from tests/Grid_any_evenodd.cc rename to tests/Test_many_evenodd.cc diff --git a/tests/Grid_nersc_io.cc b/tests/Test_nersc_io.cc similarity index 100% rename from tests/Grid_nersc_io.cc rename to tests/Test_nersc_io.cc diff --git a/tests/Grid_remez.cc b/tests/Test_remez.cc similarity index 100% rename from tests/Grid_remez.cc rename to tests/Test_remez.cc diff --git a/tests/Grid_rng.cc b/tests/Test_rng.cc similarity index 100% rename from tests/Grid_rng.cc rename to tests/Test_rng.cc diff --git a/tests/Grid_rng_fixed.cc b/tests/Test_rng_fixed.cc similarity index 100% rename from tests/Grid_rng_fixed.cc rename to tests/Test_rng_fixed.cc diff --git a/tests/Grid_simd.cc b/tests/Test_simd.cc similarity index 100% rename from tests/Grid_simd.cc rename to tests/Test_simd.cc diff --git a/tests/Grid_simd_new.cc b/tests/Test_simd_new.cc similarity index 100% rename from tests/Grid_simd_new.cc rename to tests/Test_simd_new.cc diff --git a/tests/Grid_stencil.cc b/tests/Test_stencil.cc similarity index 100% rename from tests/Grid_stencil.cc rename to tests/Test_stencil.cc diff --git a/tests/Grid_wilson_cg_prec.cc b/tests/Test_wilson_cg_prec.cc similarity index 100% rename from tests/Grid_wilson_cg_prec.cc rename to tests/Test_wilson_cg_prec.cc diff --git a/tests/Grid_wilson_cg_schur.cc b/tests/Test_wilson_cg_schur.cc similarity index 100% rename from tests/Grid_wilson_cg_schur.cc rename to tests/Test_wilson_cg_schur.cc diff --git a/tests/Grid_wilson_cg_unprec.cc b/tests/Test_wilson_cg_unprec.cc similarity index 100% rename from tests/Grid_wilson_cg_unprec.cc rename to tests/Test_wilson_cg_unprec.cc diff --git a/tests/Grid_wilson_evenodd.cc b/tests/Test_wilson_evenodd.cc similarity index 100% rename from tests/Grid_wilson_evenodd.cc rename to tests/Test_wilson_evenodd.cc From 6cb38dc5dcfcec88183230f7b597a5ece7531750 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 11:26:54 +0100 Subject: [PATCH 08/22] Overlap Wilson Cayley tanh & zolo --- lib/qcd/action/Actions.h | 27 ++++++++++--- .../action/fermion/MobiusZolotarevFermion.h | 1 + .../fermion/OverlapWilsonCayleyTanhFermion.h | 34 ++++++++++++++++ .../OverlapWilsonCayleyZolotarevFermion.h | 37 ++++++++++++++++++ .../action/fermion/PartialFractionFermion5D.h | 4 +- .../fermion/ScaledShamirZolotarevFermion.h | 39 ------------------- .../action/fermion/ShamirZolotarevFermion.h | 39 +++++++++++++++++++ tests/Test_many_cg.cc | 14 +++++-- tests/Test_many_evenodd.cc | 17 +++++++- 9 files changed, 161 insertions(+), 51 deletions(-) create mode 100644 lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h create mode 100644 lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h delete mode 100644 lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h create mode 100644 lib/qcd/action/fermion/ShamirZolotarevFermion.h diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index 893564e0..8a8c4642 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -30,19 +30,36 @@ //#include //////////////////////////////////////////// -// 5D formulations +// 5D formulations... //////////////////////////////////////////// + #include // used by all 5d overlap types + +////////// +// Cayley +////////// #include -#include -//#include #include #include + #include -#include #include -#include +#include + +#include +#include +#include + +////////////////////// +// Continued fraction +////////////////////// +#include + +////////////////////// +// Partial fraction +////////////////////// +#include // Chroma interface defining FermionAction diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h index 866d0c39..9ac795d9 100644 --- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h +++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h @@ -34,6 +34,7 @@ namespace Grid { assert(zdata->n==this->Ls); std::cout << "MobiusZolotarevFermion (b="<CayleyFermion5D::SetCoefficients(1.0,zdata,b,c); diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h new file mode 100644 index 00000000..e764c8ae --- /dev/null +++ b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h @@ -0,0 +1,34 @@ +#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H +#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class OverlapWilsonCayleyTanhFermion : public MobiusFermion + { + public: + + // Constructors + OverlapWilsonCayleyTanhFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale) : + + // b+c=scale, b-c = 0 <=> b =c = scale/2 + MobiusFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale) + { + } + }; + } +} +#endif diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h new file mode 100644 index 00000000..82c43fb7 --- /dev/null +++ b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h @@ -0,0 +1,37 @@ +#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H +#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion + { + public: + + // Constructors + + OverlapWilsonCayleyZolotarevFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo, RealD hi) : + // b+c=1.0, b-c = 0 <=> b =c = 1/2 + MobiusZolotarevFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi) + + {} + + }; + + } +} + +#endif diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.h b/lib/qcd/action/fermion/PartialFractionFermion5D.h index c51aa563..95f8c0f9 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.h +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.h @@ -25,10 +25,10 @@ namespace Grid { virtual void PartialFractionCoefficients(void); - zolotarev_data *zdata; + Approx::zolotarev_data *zdata; // Part frac - double R=(1+this->mass)/(1-this->mass); + double R; std::vector p; std::vector q; diff --git a/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h b/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h deleted file mode 100644 index a4c88d5d..00000000 --- a/lib/qcd/action/fermion/ScaledShamirZolotarevFermion.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H -#define GRID_QCD_SCALED_SHAMIR_ZOLOTAREV_FERMION_H - -#include - -namespace Grid { - - namespace QCD { - - class ScaledShamirZolotarevFermion : public MobiusZolotarevFermion - { - public: - - // Constructors - - - ScaledShamirZolotarevFermion(LatticeGaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD scale, - RealD lo, RealD hi) : - - MobiusZolotarevFermion(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),lo,hi) - - {} - - }; - - } -} - -#endif diff --git a/lib/qcd/action/fermion/ShamirZolotarevFermion.h b/lib/qcd/action/fermion/ShamirZolotarevFermion.h new file mode 100644 index 00000000..6a7df439 --- /dev/null +++ b/lib/qcd/action/fermion/ShamirZolotarevFermion.h @@ -0,0 +1,39 @@ +#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H +#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class ShamirZolotarevFermion : public MobiusZolotarevFermion + { + public: + + // Constructors + + + ShamirZolotarevFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo, RealD hi) : + + // b+c = 1; b-c = 1 => b=1, c=0 + MobiusZolotarevFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi) + + {} + + }; + + } +} + +#endif diff --git a/tests/Test_many_cg.cc b/tests/Test_many_cg.cc index 3241a025..4510d4b5 100644 --- a/tests/Test_many_cg.cc +++ b/tests/Test_many_cg.cc @@ -89,9 +89,17 @@ int main (int argc, char ** argv) ScaledShamirFermion Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0); TestCGinversions(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - std::cout <<"ScaledShamirZolotarevFermion test"<(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout <<"ShamirZolotarevFermion test"<(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonCayleyTanhFermion test"<(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonCayleyZolotarevFermion test"<(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } diff --git a/tests/Test_many_evenodd.cc b/tests/Test_many_evenodd.cc index de1e3c78..df28981b 100644 --- a/tests/Test_many_evenodd.cc +++ b/tests/Test_many_evenodd.cc @@ -48,23 +48,36 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5 =1.8; + std::cout <<"DomainWallFermion test"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); RealD b=1.5;// Scale factor b+c=2, b-c=1 RealD c=0.5; + std::cout <<"MobiusFermion test"<(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout <<"MobiusZolotarevFermion test"<(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout <<"ScaledShamirFermion test"<(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); - ScaledShamirZolotarevFermion Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0,0.1,2.0); - TestWhat(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + std::cout <<"ShamirZolotarevFermion test"<(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonCayleyTanhFermion test"<(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonCayleyZolotarevFermion test"<(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } From f9b070d64d7c306d8eb6c1af4ea2da74c5c2caf2 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 12:47:05 +0100 Subject: [PATCH 09/22] Reorganise of file naming --- Makefile.am | 2 + configure | 4 +- configure.ac | 2 +- lib/{Grid_algorithms.h => Algorithms.h} | 0 ...aligned_allocator.h => AlignedAllocator.h} | 0 lib/Cartesian.h | 8 ++ lib/{Grid_communicator.h => Communicator.h} | 2 +- lib/{Grid_comparison.h => Comparison.h} | 4 +- lib/{Grid_cshift.h => Cshift.h} | 6 +- lib/Grid.h | 26 ++-- lib/{Grid_config.h => GridConfig.h} | 0 lib/{Grid_config.h.in => GridConfig.h.in} | 2 +- lib/{Grid_init.cc => GridInit.cc} | 0 lib/Grid_cartesian.h | 8 -- lib/Grid_math.h | 16 --- lib/{Grid_lattice.h => Lattice.h} | 2 +- lib/Makefile.am | 113 ++---------------- lib/{Grid_simd.h => Simd.h} | 0 lib/{Grid_stencil.h => Stencil.h} | 2 +- lib/Tensors.h | 16 +++ lib/{Grid_threads.h => Threads.h} | 0 ...Grid_cartesian_base.h => Cartesian_base.h} | 1 - ...Grid_cartesian_full.h => Cartesian_full.h} | 0 ...sian_red_black.h => Cartesian_red_black.h} | 0 ...ommunicator_base.h => Communicator_base.h} | 0 ...ommunicator_mpi.cc => Communicator_mpi.cc} | 0 ...municator_none.cc => Communicator_none.cc} | 0 .../{Grid_cshift_common.h => Cshift_common.h} | 0 .../{Grid_cshift_mpi.h => Cshift_mpi.h} | 0 .../{Grid_cshift_none.h => Cshift_none.h} | 0 .../{Grid_lattice_ET.h => Lattice_ET.h} | 0 .../{Grid_lattice_arith.h => Lattice_arith.h} | 0 .../{Grid_lattice_base.h => Lattice_base.h} | 27 ++--- ...tice_comparison.h => Lattice_comparison.h} | 0 ...ce_conformable.h => Lattice_conformable.h} | 0 ...tice_coordinate.h => Lattice_coordinate.h} | 0 .../{Grid_lattice_local.h => Lattice_local.h} | 0 ..._lattice_overload.h => Lattice_overload.h} | 0 ..._lattice_peekpoke.h => Lattice_peekpoke.h} | 0 ...id_lattice_reality.h => Lattice_reality.h} | 0 ...attice_reduction.h => Lattice_reduction.h} | 0 .../{Grid_lattice_rng.h => Lattice_rng.h} | 0 .../{Grid_lattice_trace.h => Lattice_trace.h} | 0 ..._lattice_transfer.h => Lattice_transfer.h} | 0 ...attice_transpose.h => Lattice_transpose.h} | 0 .../{Grid_lattice_where.h => Lattice_where.h} | 0 lib/math/Grid_math_arith.h | 11 -- lib/parallelIO/{GridNerscIO.h => NerscIO.h} | 0 .../fermion/PartialFractionFermion5D.cc | 46 ------- lib/stencil/{Grid_lebesgue.cc => Lebesgue.cc} | 0 lib/stencil/{Grid_lebesgue.h => Lebesgue.h} | 0 ...id_stencil_common.cc => Stencil_common.cc} | 0 lib/tensors/Tensor_arith.h | 11 ++ .../Tensor_arith_add.h} | 0 .../Tensor_arith_mac.h} | 0 .../Tensor_arith_mul.h} | 0 .../Tensor_arith_scalar.h} | 0 .../Tensor_arith_sub.h} | 0 .../Tensor_class.h} | 0 .../Tensor_extract_merge.h} | 0 .../Tensor_inner.h} | 0 .../Tensor_outer.h} | 0 .../Tensor_peek.h} | 0 .../Tensor_poke.h} | 0 .../Tensor_reality.h} | 0 .../Tensor_trace.h} | 0 .../Tensor_traits.h} | 0 .../Tensor_transpose.h} | 0 tests/Test_cshift.cc | 1 - tests/Test_cshift_red_black.cc | 1 - tests/Test_gamma.cc | 1 - tests/Test_nersc_io.cc | 1 - tests/Test_rng.cc | 1 - tests/Test_rng_fixed.cc | 1 - tests/Test_simd.cc | 1 - 75 files changed, 85 insertions(+), 231 deletions(-) rename lib/{Grid_algorithms.h => Algorithms.h} (100%) rename lib/{Grid_aligned_allocator.h => AlignedAllocator.h} (100%) create mode 100644 lib/Cartesian.h rename lib/{Grid_communicator.h => Communicator.h} (57%) rename lib/{Grid_comparison.h => Comparison.h} (98%) rename lib/{Grid_cshift.h => Cshift.h} (51%) rename lib/{Grid_config.h => GridConfig.h} (100%) rename lib/{Grid_config.h.in => GridConfig.h.in} (97%) rename lib/{Grid_init.cc => GridInit.cc} (100%) delete mode 100644 lib/Grid_cartesian.h delete mode 100644 lib/Grid_math.h rename lib/{Grid_lattice.h => Lattice.h} (58%) rename lib/{Grid_simd.h => Simd.h} (100%) rename lib/{Grid_stencil.h => Stencil.h} (99%) create mode 100644 lib/Tensors.h rename lib/{Grid_threads.h => Threads.h} (100%) rename lib/cartesian/{Grid_cartesian_base.h => Cartesian_base.h} (99%) rename lib/cartesian/{Grid_cartesian_full.h => Cartesian_full.h} (100%) rename lib/cartesian/{Grid_cartesian_red_black.h => Cartesian_red_black.h} (100%) rename lib/communicator/{Grid_communicator_base.h => Communicator_base.h} (100%) rename lib/communicator/{Grid_communicator_mpi.cc => Communicator_mpi.cc} (100%) rename lib/communicator/{Grid_communicator_none.cc => Communicator_none.cc} (100%) rename lib/cshift/{Grid_cshift_common.h => Cshift_common.h} (100%) rename lib/cshift/{Grid_cshift_mpi.h => Cshift_mpi.h} (100%) rename lib/cshift/{Grid_cshift_none.h => Cshift_none.h} (100%) rename lib/lattice/{Grid_lattice_ET.h => Lattice_ET.h} (100%) rename lib/lattice/{Grid_lattice_arith.h => Lattice_arith.h} (100%) rename lib/lattice/{Grid_lattice_base.h => Lattice_base.h} (93%) rename lib/lattice/{Grid_lattice_comparison.h => Lattice_comparison.h} (100%) rename lib/lattice/{Grid_lattice_conformable.h => Lattice_conformable.h} (100%) rename lib/lattice/{Grid_lattice_coordinate.h => Lattice_coordinate.h} (100%) rename lib/lattice/{Grid_lattice_local.h => Lattice_local.h} (100%) rename lib/lattice/{Grid_lattice_overload.h => Lattice_overload.h} (100%) rename lib/lattice/{Grid_lattice_peekpoke.h => Lattice_peekpoke.h} (100%) rename lib/lattice/{Grid_lattice_reality.h => Lattice_reality.h} (100%) rename lib/lattice/{Grid_lattice_reduction.h => Lattice_reduction.h} (100%) rename lib/lattice/{Grid_lattice_rng.h => Lattice_rng.h} (100%) rename lib/lattice/{Grid_lattice_trace.h => Lattice_trace.h} (100%) rename lib/lattice/{Grid_lattice_transfer.h => Lattice_transfer.h} (100%) rename lib/lattice/{Grid_lattice_transpose.h => Lattice_transpose.h} (100%) rename lib/lattice/{Grid_lattice_where.h => Lattice_where.h} (100%) delete mode 100644 lib/math/Grid_math_arith.h rename lib/parallelIO/{GridNerscIO.h => NerscIO.h} (100%) rename lib/stencil/{Grid_lebesgue.cc => Lebesgue.cc} (100%) rename lib/stencil/{Grid_lebesgue.h => Lebesgue.h} (100%) rename lib/stencil/{Grid_stencil_common.cc => Stencil_common.cc} (100%) create mode 100644 lib/tensors/Tensor_arith.h rename lib/{math/Grid_math_arith_add.h => tensors/Tensor_arith_add.h} (100%) rename lib/{math/Grid_math_arith_mac.h => tensors/Tensor_arith_mac.h} (100%) rename lib/{math/Grid_math_arith_mul.h => tensors/Tensor_arith_mul.h} (100%) rename lib/{math/Grid_math_arith_scalar.h => tensors/Tensor_arith_scalar.h} (100%) rename lib/{math/Grid_math_arith_sub.h => tensors/Tensor_arith_sub.h} (100%) rename lib/{math/Grid_math_tensors.h => tensors/Tensor_class.h} (100%) rename lib/{Grid_extract.h => tensors/Tensor_extract_merge.h} (100%) rename lib/{math/Grid_math_inner.h => tensors/Tensor_inner.h} (100%) rename lib/{math/Grid_math_outer.h => tensors/Tensor_outer.h} (100%) rename lib/{math/Grid_math_peek.h => tensors/Tensor_peek.h} (100%) rename lib/{math/Grid_math_poke.h => tensors/Tensor_poke.h} (100%) rename lib/{math/Grid_math_reality.h => tensors/Tensor_reality.h} (100%) rename lib/{math/Grid_math_trace.h => tensors/Tensor_trace.h} (100%) rename lib/{math/Grid_math_traits.h => tensors/Tensor_traits.h} (100%) rename lib/{math/Grid_math_transpose.h => tensors/Tensor_transpose.h} (100%) diff --git a/Makefile.am b/Makefile.am index fc3f6a0a..3b1d5690 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,3 +1,5 @@ # additional include paths necessary to compile the C++ library AM_CXXFLAGS = -I$(top_srcdir)/ SUBDIRS = lib tests benchmarks + +filelist: $(SUBDIRS) \ No newline at end of file diff --git a/configure b/configure index 615ba987..20328052 100755 --- a/configure +++ b/configure @@ -3064,7 +3064,7 @@ fi -ac_config_headers="$ac_config_headers lib/Grid_config.h" +ac_config_headers="$ac_config_headers lib/GridConfig.h" # Check whether --enable-silent-rules was given. if test "${enable_silent_rules+set}" = set; then : @@ -5814,7 +5814,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 for ac_config_target in $ac_config_targets do case $ac_config_target in - "lib/Grid_config.h") CONFIG_HEADERS="$CONFIG_HEADERS lib/Grid_config.h" ;; + "lib/GridConfig.h") CONFIG_HEADERS="$CONFIG_HEADERS lib/GridConfig.h" ;; "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "docs/doxy.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxy.cfg" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; diff --git a/configure.ac b/configure.ac index 5dcbea36..03fd7900 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,7 @@ AC_CANONICAL_SYSTEM AM_INIT_AUTOMAKE(subdir-objects) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR([lib/Grid.h]) -AC_CONFIG_HEADERS([lib/Grid_config.h]) +AC_CONFIG_HEADERS([lib/GridConfig.h]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_MSG_NOTICE([ diff --git a/lib/Grid_algorithms.h b/lib/Algorithms.h similarity index 100% rename from lib/Grid_algorithms.h rename to lib/Algorithms.h diff --git a/lib/Grid_aligned_allocator.h b/lib/AlignedAllocator.h similarity index 100% rename from lib/Grid_aligned_allocator.h rename to lib/AlignedAllocator.h diff --git a/lib/Cartesian.h b/lib/Cartesian.h new file mode 100644 index 00000000..db384b53 --- /dev/null +++ b/lib/Cartesian.h @@ -0,0 +1,8 @@ +#ifndef GRID_CARTESIAN_H +#define GRID_CARTESIAN_H + +#include +#include +#include + +#endif diff --git a/lib/Grid_communicator.h b/lib/Communicator.h similarity index 57% rename from lib/Grid_communicator.h rename to lib/Communicator.h index cfa6e0a7..6880adda 100644 --- a/lib/Grid_communicator.h +++ b/lib/Communicator.h @@ -1,6 +1,6 @@ #ifndef GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H -#include +#include #endif diff --git a/lib/Grid_comparison.h b/lib/Comparison.h similarity index 98% rename from lib/Grid_comparison.h rename to lib/Comparison.h index 3f9c206d..ecd6ece0 100644 --- a/lib/Grid_comparison.h +++ b/lib/Comparison.h @@ -141,7 +141,7 @@ namespace Grid { } } -#include -#include +#include +#include #endif diff --git a/lib/Grid_cshift.h b/lib/Cshift.h similarity index 51% rename from lib/Grid_cshift.h rename to lib/Cshift.h index 10c7a3c4..3caccbf9 100644 --- a/lib/Grid_cshift.h +++ b/lib/Cshift.h @@ -1,13 +1,13 @@ #ifndef _GRID_CSHIFT_H_ #define _GRID_CSHIFT_H_ -#include +#include #ifdef GRID_COMMS_NONE -#include +#include #endif #ifdef GRID_COMMS_MPI -#include +#include #endif #endif diff --git a/lib/Grid.h b/lib/Grid.h index 7fa56892..16530434 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -33,7 +33,7 @@ #define strong_inline __attribute__((always_inline)) inline -#include +#include //////////////////////////////////////////////////////////// // Tunable header includes @@ -46,22 +46,22 @@ #include #endif -#include +#include -#include -#include +#include +#include -#include // subdir aggregate -#include // subdir aggregate -#include // subdir aggregate -#include -#include // subdir aggregate -#include // subdir aggregate - -#include // subdir aggregate +#include // subdir aggregate +#include // subdir aggregate +#include // subdir aggregate +#include // subdir aggregate +#include +#include // subdir aggregate +#include // subdir aggregate +#include // subdir aggregate #include -#include +#include namespace Grid { diff --git a/lib/Grid_config.h b/lib/GridConfig.h similarity index 100% rename from lib/Grid_config.h rename to lib/GridConfig.h diff --git a/lib/Grid_config.h.in b/lib/GridConfig.h.in similarity index 97% rename from lib/Grid_config.h.in rename to lib/GridConfig.h.in index 6f05d6cb..03f91ff4 100644 --- a/lib/Grid_config.h.in +++ b/lib/GridConfig.h.in @@ -1,4 +1,4 @@ -/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */ +/* lib/GridConfig.h.in. Generated from configure.ac by autoheader. */ /* AVX */ #undef AVX1 diff --git a/lib/Grid_init.cc b/lib/GridInit.cc similarity index 100% rename from lib/Grid_init.cc rename to lib/GridInit.cc diff --git a/lib/Grid_cartesian.h b/lib/Grid_cartesian.h deleted file mode 100644 index c01be20a..00000000 --- a/lib/Grid_cartesian.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef GRID_CARTESIAN_H -#define GRID_CARTESIAN_H - -#include -#include -#include - -#endif diff --git a/lib/Grid_math.h b/lib/Grid_math.h deleted file mode 100644 index 17bc09a5..00000000 --- a/lib/Grid_math.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef GRID_MATH_H -#define GRID_MATH_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#endif diff --git a/lib/Grid_lattice.h b/lib/Lattice.h similarity index 58% rename from lib/Grid_lattice.h rename to lib/Lattice.h index 35664aee..1f29a908 100644 --- a/lib/Grid_lattice.h +++ b/lib/Lattice.h @@ -1,6 +1,6 @@ #ifndef GRID_LATTICE_H #define GRID_LATTICE_H -#include +#include #endif diff --git a/lib/Makefile.am b/lib/Makefile.am index b688d8b2..7ee52e0e 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -3,119 +3,26 @@ AM_CXXFLAGS = -I$(top_srcdir)/ extra_sources= if BUILD_COMMS_MPI - extra_sources+=communicator/Grid_communicator_mpi.cc + extra_sources+=communicator/Communicator_mpi.cc endif if BUILD_COMMS_NONE - extra_sources+=communicator/Grid_communicator_none.cc + extra_sources+=communicator/Communicator_none.cc endif # # Libraries # -lib_LIBRARIES = libGrid.a -libGrid_a_SOURCES = \ - Grid_init.cc \ - stencil/Grid_lebesgue.cc \ - stencil/Grid_stencil_common.cc \ - algorithms/approx/Zolotarev.cc \ - algorithms/approx/Remez.cc \ - qcd/SpaceTimeGrid.cc\ - qcd/Dirac.cc\ - qcd/action/fermion/WilsonKernels.cc\ - qcd/action/fermion/WilsonKernelsHand.cc\ - qcd/action/fermion/WilsonFermion.cc\ - qcd/action/fermion/WilsonFermion5D.cc\ - qcd/action/fermion/CayleyFermion5D.cc \ - qcd/action/fermion/ContinuedFractionFermion5D.cc \ - $(extra_sources) -# qcd/action/fermion/PartialFractionFermion5D.cc \ +include Make.inc + +lib_LIBRARIES = libGrid.a +libGrid_a_SOURCES = $(CCFILES) $(extra_sources) + + +# qcd/action/fermion/PartialFractionFermion5D.cc\ \ # # Include files # -nobase_include_HEADERS=\ - ./algorithms/approx/bigfloat.h\ - ./algorithms/approx/bigfloat_double.h\ - ./algorithms/approx/Chebyshev.h\ - ./algorithms/approx/Remez.h\ - ./algorithms/approx/Zolotarev.h\ - ./algorithms/iterative/ConjugateGradient.h\ - ./algorithms/iterative/NormalEquations.h\ - ./algorithms/iterative/SchurRedBlack.h\ - ./algorithms/LinearOperator.h\ - ./algorithms/SparseMatrix.h\ - ./cartesian/Grid_cartesian_base.h\ - ./cartesian/Grid_cartesian_full.h\ - ./cartesian/Grid_cartesian_red_black.h\ - ./communicator/Grid_communicator_base.h\ - ./cshift/Grid_cshift_common.h\ - ./cshift/Grid_cshift_mpi.h\ - ./cshift/Grid_cshift_none.h\ - ./Grid.h\ - ./Grid_algorithms.h\ - ./Grid_aligned_allocator.h\ - ./Grid_cartesian.h\ - ./Grid_communicator.h\ - ./Grid_comparison.h\ - ./Grid_config.h\ - ./Grid_cshift.h\ - ./Grid_extract.h\ - ./Grid_lattice.h\ - ./Grid_math.h\ - ./Grid_simd.h\ - ./Grid_stencil.h\ - ./Grid_threads.h\ - ./lattice/Grid_lattice_arith.h\ - ./lattice/Grid_lattice_base.h\ - ./lattice/Grid_lattice_comparison.h\ - ./lattice/Grid_lattice_conformable.h\ - ./lattice/Grid_lattice_coordinate.h\ - ./lattice/Grid_lattice_ET.h\ - ./lattice/Grid_lattice_local.h\ - ./lattice/Grid_lattice_overload.h\ - ./lattice/Grid_lattice_peekpoke.h\ - ./lattice/Grid_lattice_reality.h\ - ./lattice/Grid_lattice_reduction.h\ - ./lattice/Grid_lattice_rng.h\ - ./lattice/Grid_lattice_trace.h\ - ./lattice/Grid_lattice_transfer.h\ - ./lattice/Grid_lattice_transpose.h\ - ./lattice/Grid_lattice_where.h\ - ./math/Grid_math_arith.h\ - ./math/Grid_math_arith_add.h\ - ./math/Grid_math_arith_mac.h\ - ./math/Grid_math_arith_mul.h\ - ./math/Grid_math_arith_scalar.h\ - ./math/Grid_math_arith_sub.h\ - ./math/Grid_math_inner.h\ - ./math/Grid_math_outer.h\ - ./math/Grid_math_peek.h\ - ./math/Grid_math_poke.h\ - ./math/Grid_math_reality.h\ - ./math/Grid_math_tensors.h\ - ./math/Grid_math_trace.h\ - ./math/Grid_math_traits.h\ - ./math/Grid_math_transpose.h\ - ./parallelIO/GridNerscIO.h\ - ./qcd/action/Actions.h\ - ./qcd/action/fermion/FermionOperator.h\ - ./qcd/action/fermion/WilsonCompressor.h\ - ./qcd/action/fermion/WilsonKernels.h\ - ./qcd/action/fermion/WilsonFermion.h\ - ./qcd/action/fermion/WilsonFermion5D.h\ - ./qcd/Dirac.h\ - ./qcd/QCD.h\ - ./qcd/TwoSpinor.h\ - ./simd/Grid_avx.h\ - ./simd/Grid_avx512.h\ - ./simd/Grid_qpx.h\ - ./simd/Grid_sse4.h\ - ./simd/Grid_vector_types.h\ - ./simd/Old/Grid_vComplexD.h\ - ./simd/Old/Grid_vComplexF.h\ - ./simd/Old/Grid_vInteger.h\ - ./simd/Old/Grid_vRealD.h\ - ./simd/Old/Grid_vRealF.h\ - ./stencil/Grid_lebesgue.h +nobase_include_HEADERS=$(HFILES) diff --git a/lib/Grid_simd.h b/lib/Simd.h similarity index 100% rename from lib/Grid_simd.h rename to lib/Simd.h diff --git a/lib/Grid_stencil.h b/lib/Stencil.h similarity index 99% rename from lib/Grid_stencil.h rename to lib/Stencil.h index 50d22453..8529e73a 100644 --- a/lib/Grid_stencil.h +++ b/lib/Stencil.h @@ -1,7 +1,7 @@ #ifndef GRID_STENCIL_H #define GRID_STENCIL_H -#include // subdir aggregate +#include // subdir aggregate ////////////////////////////////////////////////////////////////////////////////////////// // Must not lose sight that goal is to be able to construct really efficient diff --git a/lib/Tensors.h b/lib/Tensors.h new file mode 100644 index 00000000..e812149f --- /dev/null +++ b/lib/Tensors.h @@ -0,0 +1,16 @@ +#ifndef GRID_MATH_H +#define GRID_MATH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/lib/Grid_threads.h b/lib/Threads.h similarity index 100% rename from lib/Grid_threads.h rename to lib/Threads.h diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Cartesian_base.h similarity index 99% rename from lib/cartesian/Grid_cartesian_base.h rename to lib/cartesian/Cartesian_base.h index 66339648..6303e38e 100644 --- a/lib/cartesian/Grid_cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -2,7 +2,6 @@ #define GRID_CARTESIAN_BASE_H #include -#include namespace Grid{ diff --git a/lib/cartesian/Grid_cartesian_full.h b/lib/cartesian/Cartesian_full.h similarity index 100% rename from lib/cartesian/Grid_cartesian_full.h rename to lib/cartesian/Cartesian_full.h diff --git a/lib/cartesian/Grid_cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h similarity index 100% rename from lib/cartesian/Grid_cartesian_red_black.h rename to lib/cartesian/Cartesian_red_black.h diff --git a/lib/communicator/Grid_communicator_base.h b/lib/communicator/Communicator_base.h similarity index 100% rename from lib/communicator/Grid_communicator_base.h rename to lib/communicator/Communicator_base.h diff --git a/lib/communicator/Grid_communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc similarity index 100% rename from lib/communicator/Grid_communicator_mpi.cc rename to lib/communicator/Communicator_mpi.cc diff --git a/lib/communicator/Grid_communicator_none.cc b/lib/communicator/Communicator_none.cc similarity index 100% rename from lib/communicator/Grid_communicator_none.cc rename to lib/communicator/Communicator_none.cc diff --git a/lib/cshift/Grid_cshift_common.h b/lib/cshift/Cshift_common.h similarity index 100% rename from lib/cshift/Grid_cshift_common.h rename to lib/cshift/Cshift_common.h diff --git a/lib/cshift/Grid_cshift_mpi.h b/lib/cshift/Cshift_mpi.h similarity index 100% rename from lib/cshift/Grid_cshift_mpi.h rename to lib/cshift/Cshift_mpi.h diff --git a/lib/cshift/Grid_cshift_none.h b/lib/cshift/Cshift_none.h similarity index 100% rename from lib/cshift/Grid_cshift_none.h rename to lib/cshift/Cshift_none.h diff --git a/lib/lattice/Grid_lattice_ET.h b/lib/lattice/Lattice_ET.h similarity index 100% rename from lib/lattice/Grid_lattice_ET.h rename to lib/lattice/Lattice_ET.h diff --git a/lib/lattice/Grid_lattice_arith.h b/lib/lattice/Lattice_arith.h similarity index 100% rename from lib/lattice/Grid_lattice_arith.h rename to lib/lattice/Lattice_arith.h diff --git a/lib/lattice/Grid_lattice_base.h b/lib/lattice/Lattice_base.h similarity index 93% rename from lib/lattice/Grid_lattice_base.h rename to lib/lattice/Lattice_base.h index 15ef0944..6b5fe261 100644 --- a/lib/lattice/Grid_lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -283,24 +283,23 @@ PARALLEL_FOR_LOOP -#include +#include #define GRID_LATTICE_EXPRESSION_TEMPLATES #ifdef GRID_LATTICE_EXPRESSION_TEMPLATES -#include +#include #else -#include +#include #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/lib/lattice/Grid_lattice_comparison.h b/lib/lattice/Lattice_comparison.h similarity index 100% rename from lib/lattice/Grid_lattice_comparison.h rename to lib/lattice/Lattice_comparison.h diff --git a/lib/lattice/Grid_lattice_conformable.h b/lib/lattice/Lattice_conformable.h similarity index 100% rename from lib/lattice/Grid_lattice_conformable.h rename to lib/lattice/Lattice_conformable.h diff --git a/lib/lattice/Grid_lattice_coordinate.h b/lib/lattice/Lattice_coordinate.h similarity index 100% rename from lib/lattice/Grid_lattice_coordinate.h rename to lib/lattice/Lattice_coordinate.h diff --git a/lib/lattice/Grid_lattice_local.h b/lib/lattice/Lattice_local.h similarity index 100% rename from lib/lattice/Grid_lattice_local.h rename to lib/lattice/Lattice_local.h diff --git a/lib/lattice/Grid_lattice_overload.h b/lib/lattice/Lattice_overload.h similarity index 100% rename from lib/lattice/Grid_lattice_overload.h rename to lib/lattice/Lattice_overload.h diff --git a/lib/lattice/Grid_lattice_peekpoke.h b/lib/lattice/Lattice_peekpoke.h similarity index 100% rename from lib/lattice/Grid_lattice_peekpoke.h rename to lib/lattice/Lattice_peekpoke.h diff --git a/lib/lattice/Grid_lattice_reality.h b/lib/lattice/Lattice_reality.h similarity index 100% rename from lib/lattice/Grid_lattice_reality.h rename to lib/lattice/Lattice_reality.h diff --git a/lib/lattice/Grid_lattice_reduction.h b/lib/lattice/Lattice_reduction.h similarity index 100% rename from lib/lattice/Grid_lattice_reduction.h rename to lib/lattice/Lattice_reduction.h diff --git a/lib/lattice/Grid_lattice_rng.h b/lib/lattice/Lattice_rng.h similarity index 100% rename from lib/lattice/Grid_lattice_rng.h rename to lib/lattice/Lattice_rng.h diff --git a/lib/lattice/Grid_lattice_trace.h b/lib/lattice/Lattice_trace.h similarity index 100% rename from lib/lattice/Grid_lattice_trace.h rename to lib/lattice/Lattice_trace.h diff --git a/lib/lattice/Grid_lattice_transfer.h b/lib/lattice/Lattice_transfer.h similarity index 100% rename from lib/lattice/Grid_lattice_transfer.h rename to lib/lattice/Lattice_transfer.h diff --git a/lib/lattice/Grid_lattice_transpose.h b/lib/lattice/Lattice_transpose.h similarity index 100% rename from lib/lattice/Grid_lattice_transpose.h rename to lib/lattice/Lattice_transpose.h diff --git a/lib/lattice/Grid_lattice_where.h b/lib/lattice/Lattice_where.h similarity index 100% rename from lib/lattice/Grid_lattice_where.h rename to lib/lattice/Lattice_where.h diff --git a/lib/math/Grid_math_arith.h b/lib/math/Grid_math_arith.h deleted file mode 100644 index ca90ba88..00000000 --- a/lib/math/Grid_math_arith.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef GRID_MATH_ARITH_H -#define GRID_MATH_ARITH_H - -#include -#include -#include -#include -#include - -#endif - diff --git a/lib/parallelIO/GridNerscIO.h b/lib/parallelIO/NerscIO.h similarity index 100% rename from lib/parallelIO/GridNerscIO.h rename to lib/parallelIO/NerscIO.h diff --git a/lib/qcd/action/fermion/PartialFractionFermion5D.cc b/lib/qcd/action/fermion/PartialFractionFermion5D.cc index 21f62a75..8b137891 100644 --- a/lib/qcd/action/fermion/PartialFractionFermion5D.cc +++ b/lib/qcd/action/fermion/PartialFractionFermion5D.cc @@ -1,47 +1 @@ -#ifndef GRID_QCD_PARTIAL_FRACTION_H -#define GRID_QCD_PARTIAL_FRACTION_H -namespace Grid { - - namespace QCD { - - class PartialFractionFermion5D : public WilsonFermion5D - { - public: - - // override multiply - virtual RealD M (const LatticeFermion &in, LatticeFermion &out); - virtual RealD Mdag (const LatticeFermion &in, LatticeFermion &out); - - // half checkerboard operaions - virtual void Meooe (const LatticeFermion &in, LatticeFermion &out); - virtual void MeooeDag (const LatticeFermion &in, LatticeFermion &out); - virtual void Mooee (const LatticeFermion &in, LatticeFermion &out); - virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); - virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); - virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); - - private: - - zolotarev_data *zdata; - - // Part frac - double R=(1+this->mass)/(1-this->mass); - std::vector p; - std::vector q; - - // Constructors - PartialFractionFermion5D(LatticeGaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD M5); - - }; - - - } -} - -#endif diff --git a/lib/stencil/Grid_lebesgue.cc b/lib/stencil/Lebesgue.cc similarity index 100% rename from lib/stencil/Grid_lebesgue.cc rename to lib/stencil/Lebesgue.cc diff --git a/lib/stencil/Grid_lebesgue.h b/lib/stencil/Lebesgue.h similarity index 100% rename from lib/stencil/Grid_lebesgue.h rename to lib/stencil/Lebesgue.h diff --git a/lib/stencil/Grid_stencil_common.cc b/lib/stencil/Stencil_common.cc similarity index 100% rename from lib/stencil/Grid_stencil_common.cc rename to lib/stencil/Stencil_common.cc diff --git a/lib/tensors/Tensor_arith.h b/lib/tensors/Tensor_arith.h new file mode 100644 index 00000000..853a19a5 --- /dev/null +++ b/lib/tensors/Tensor_arith.h @@ -0,0 +1,11 @@ +#ifndef GRID_MATH_ARITH_H +#define GRID_MATH_ARITH_H + +#include +#include +#include +#include +#include + +#endif + diff --git a/lib/math/Grid_math_arith_add.h b/lib/tensors/Tensor_arith_add.h similarity index 100% rename from lib/math/Grid_math_arith_add.h rename to lib/tensors/Tensor_arith_add.h diff --git a/lib/math/Grid_math_arith_mac.h b/lib/tensors/Tensor_arith_mac.h similarity index 100% rename from lib/math/Grid_math_arith_mac.h rename to lib/tensors/Tensor_arith_mac.h diff --git a/lib/math/Grid_math_arith_mul.h b/lib/tensors/Tensor_arith_mul.h similarity index 100% rename from lib/math/Grid_math_arith_mul.h rename to lib/tensors/Tensor_arith_mul.h diff --git a/lib/math/Grid_math_arith_scalar.h b/lib/tensors/Tensor_arith_scalar.h similarity index 100% rename from lib/math/Grid_math_arith_scalar.h rename to lib/tensors/Tensor_arith_scalar.h diff --git a/lib/math/Grid_math_arith_sub.h b/lib/tensors/Tensor_arith_sub.h similarity index 100% rename from lib/math/Grid_math_arith_sub.h rename to lib/tensors/Tensor_arith_sub.h diff --git a/lib/math/Grid_math_tensors.h b/lib/tensors/Tensor_class.h similarity index 100% rename from lib/math/Grid_math_tensors.h rename to lib/tensors/Tensor_class.h diff --git a/lib/Grid_extract.h b/lib/tensors/Tensor_extract_merge.h similarity index 100% rename from lib/Grid_extract.h rename to lib/tensors/Tensor_extract_merge.h diff --git a/lib/math/Grid_math_inner.h b/lib/tensors/Tensor_inner.h similarity index 100% rename from lib/math/Grid_math_inner.h rename to lib/tensors/Tensor_inner.h diff --git a/lib/math/Grid_math_outer.h b/lib/tensors/Tensor_outer.h similarity index 100% rename from lib/math/Grid_math_outer.h rename to lib/tensors/Tensor_outer.h diff --git a/lib/math/Grid_math_peek.h b/lib/tensors/Tensor_peek.h similarity index 100% rename from lib/math/Grid_math_peek.h rename to lib/tensors/Tensor_peek.h diff --git a/lib/math/Grid_math_poke.h b/lib/tensors/Tensor_poke.h similarity index 100% rename from lib/math/Grid_math_poke.h rename to lib/tensors/Tensor_poke.h diff --git a/lib/math/Grid_math_reality.h b/lib/tensors/Tensor_reality.h similarity index 100% rename from lib/math/Grid_math_reality.h rename to lib/tensors/Tensor_reality.h diff --git a/lib/math/Grid_math_trace.h b/lib/tensors/Tensor_trace.h similarity index 100% rename from lib/math/Grid_math_trace.h rename to lib/tensors/Tensor_trace.h diff --git a/lib/math/Grid_math_traits.h b/lib/tensors/Tensor_traits.h similarity index 100% rename from lib/math/Grid_math_traits.h rename to lib/tensors/Tensor_traits.h diff --git a/lib/math/Grid_math_transpose.h b/lib/tensors/Tensor_transpose.h similarity index 100% rename from lib/math/Grid_math_transpose.h rename to lib/tensors/Tensor_transpose.h diff --git a/tests/Test_cshift.cc b/tests/Test_cshift.cc index 76125977..cc1da333 100644 --- a/tests/Test_cshift.cc +++ b/tests/Test_cshift.cc @@ -1,5 +1,4 @@ #include -#include using namespace Grid; using namespace Grid::QCD; diff --git a/tests/Test_cshift_red_black.cc b/tests/Test_cshift_red_black.cc index 212e9b6e..9ffa66b1 100644 --- a/tests/Test_cshift_red_black.cc +++ b/tests/Test_cshift_red_black.cc @@ -1,5 +1,4 @@ #include -#include using namespace Grid; using namespace Grid::QCD; diff --git a/tests/Test_gamma.cc b/tests/Test_gamma.cc index e803029b..bae77ecc 100644 --- a/tests/Test_gamma.cc +++ b/tests/Test_gamma.cc @@ -1,5 +1,4 @@ #include -#include using namespace std; using namespace Grid; diff --git a/tests/Test_nersc_io.cc b/tests/Test_nersc_io.cc index 6fe587a6..80d78291 100644 --- a/tests/Test_nersc_io.cc +++ b/tests/Test_nersc_io.cc @@ -1,5 +1,4 @@ #include -#include using namespace std; using namespace Grid; diff --git a/tests/Test_rng.cc b/tests/Test_rng.cc index 97f6c6b7..1731b740 100644 --- a/tests/Test_rng.cc +++ b/tests/Test_rng.cc @@ -1,5 +1,4 @@ #include -#include using namespace std; using namespace Grid; diff --git a/tests/Test_rng_fixed.cc b/tests/Test_rng_fixed.cc index 01e3315b..c836c93f 100644 --- a/tests/Test_rng_fixed.cc +++ b/tests/Test_rng_fixed.cc @@ -1,5 +1,4 @@ #include -#include using namespace std; using namespace Grid; diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index db600fe4..f4858d06 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -1,5 +1,4 @@ #include -#include using namespace std; using namespace Grid; From 54b56959f534d0a4ccf3bbaacda247528cb28228 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 12:47:46 +0100 Subject: [PATCH 10/22] Convenience script to build the list of headers and .cc files in the library --- scripts/filelist | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 scripts/filelist diff --git a/scripts/filelist b/scripts/filelist new file mode 100755 index 00000000..73c6db51 --- /dev/null +++ b/scripts/filelist @@ -0,0 +1,9 @@ +#!/bin/bash + +cd lib +HFILES=`find . -type f -name '*.h'` +CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'` +echo HFILES=$HFILES > Make.inc +echo >> Make.inc +echo CCFILES=$CCFILES >> Make.inc +cd .. From 3254bb2c8ef0e1d5bd5ba869dd6941c75cd5a1e9 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 12:49:36 +0100 Subject: [PATCH 11/22] Make.inc needed in repo --- lib/Make.inc | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 lib/Make.inc diff --git a/lib/Make.inc b/lib/Make.inc new file mode 100644 index 00000000..31683256 --- /dev/null +++ b/lib/Make.inc @@ -0,0 +1,3 @@ +HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h + +CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc From e68d0870105a920470deb741426cd9acd2b32d3e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 3 Jun 2015 13:07:00 +0100 Subject: [PATCH 12/22] Assist for generating file lists contained in Make.inc files for convenience when things are added --- benchmarks/Make.inc | 23 +++ benchmarks/Makefile.am | 23 +-- ...nchmark_su3_expr.cc => simple_su3_expr.cc} | 0 ...nchmark_su3_test.cc => simple_su3_test.cc} | 0 lib/Make.inc | 1 + scripts/filelist | 49 +++++- tests/Make.inc | 83 +++++++++ tests/Makefile.am | 89 +--------- tests/Test_simd_new.cc | 165 ------------------ 9 files changed, 157 insertions(+), 276 deletions(-) create mode 100644 benchmarks/Make.inc rename benchmarks/{Benchmark_su3_expr.cc => simple_su3_expr.cc} (100%) rename benchmarks/{Benchmark_su3_test.cc => simple_su3_test.cc} (100%) create mode 100644 tests/Make.inc delete mode 100644 tests/Test_simd_new.cc diff --git a/benchmarks/Make.inc b/benchmarks/Make.inc new file mode 100644 index 00000000..9871149c --- /dev/null +++ b/benchmarks/Make.inc @@ -0,0 +1,23 @@ + +bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson + + +Benchmark_comms_SOURCES=Benchmark_comms.cc +Benchmark_comms_LDADD=-lGrid + + +Benchmark_dwf_SOURCES=Benchmark_dwf.cc +Benchmark_dwf_LDADD=-lGrid + + +Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc +Benchmark_memory_bandwidth_LDADD=-lGrid + + +Benchmark_su3_SOURCES=Benchmark_su3.cc +Benchmark_su3_LDADD=-lGrid + + +Benchmark_wilson_SOURCES=Benchmark_wilson.cc +Benchmark_wilson_LDADD=-lGrid + diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 74ff03c6..8081844f 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -5,25 +5,4 @@ AM_LDFLAGS = -L$(top_builddir)/lib # # Test code # -bin_PROGRAMS = \ - Benchmark_comms \ - Benchmark_memory_bandwidth \ - Benchmark_su3 \ - Benchmark_wilson \ - Benchmark_dwf - -Benchmark_comms_SOURCES = Benchmark_comms.cc -Benchmark_comms_LDADD = -lGrid - -Benchmark_memory_bandwidth_SOURCES = Benchmark_memory_bandwidth.cc -Benchmark_memory_bandwidth_LDADD = -lGrid - -Benchmark_su3_SOURCES = Benchmark_su3.cc Benchmark_su3_test.cc Benchmark_su3_expr.cc -Benchmark_su3_LDADD = -lGrid - -Benchmark_wilson_SOURCES = Benchmark_wilson.cc -Benchmark_wilson_LDADD = -lGrid - -Benchmark_dwf_SOURCES = Benchmark_dwf.cc -Benchmark_dwf_LDADD = -lGrid - +include Make.inc diff --git a/benchmarks/Benchmark_su3_expr.cc b/benchmarks/simple_su3_expr.cc similarity index 100% rename from benchmarks/Benchmark_su3_expr.cc rename to benchmarks/simple_su3_expr.cc diff --git a/benchmarks/Benchmark_su3_test.cc b/benchmarks/simple_su3_test.cc similarity index 100% rename from benchmarks/Benchmark_su3_test.cc rename to benchmarks/simple_su3_test.cc diff --git a/lib/Make.inc b/lib/Make.inc index 31683256..dd5269ac 100644 --- a/lib/Make.inc +++ b/lib/Make.inc @@ -1,3 +1,4 @@ + HFILES=./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/SchurRedBlack.h ./algorithms/LinearOperator.h ./algorithms/SparseMatrix.h ./Algorithms.h ./AlignedAllocator.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./Cartesian.h ./communicator/Communicator_base.h ./Communicator.h ./Comparison.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./Cshift.h ./Grid.h ./GridConfig.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_ET.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_where.h ./Lattice.h ./parallelIO/NerscIO.h ./qcd/action/Actions.h ./qcd/action/DiffAction.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/Dirac.h ./qcd/LinalgUtils.h ./qcd/QCD.h ./qcd/SpaceTimeGrid.h ./qcd/TwoSpinor.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Old/Grid_vComplexD.h ./simd/Old/Grid_vComplexF.h ./simd/Old/Grid_vInteger.h ./simd/Old/Grid_vRealD.h ./simd/Old/Grid_vRealF.h ./Simd.h ./stencil/Lebesgue.h ./Stencil.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_inner.h ./tensors/Tensor_outer.h ./tensors/Tensor_peek.h ./tensors/Tensor_poke.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./Tensors.h ./Threads.h CCFILES=./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./GridInit.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/Dirac.cc ./qcd/SpaceTimeGrid.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/scripts/filelist b/scripts/filelist index 73c6db51..1b575611 100755 --- a/scripts/filelist +++ b/scripts/filelist @@ -1,9 +1,56 @@ #!/bin/bash cd lib + HFILES=`find . -type f -name '*.h'` CCFILES=`find . -type f -name '*.cc' -not -name '*ommunicator*.cc'` -echo HFILES=$HFILES > Make.inc +echo> Make.inc +echo HFILES=$HFILES >> Make.inc echo >> Make.inc echo CCFILES=$CCFILES >> Make.inc + cd .. + + + +cd tests + +echo> Make.inc +TESTS=`ls T*.cc` +TESTLIST=`echo ${TESTS} | sed s/.cc//g ` + +echo > Make.inc +echo bin_PROGRAMS = ${TESTLIST} >> Make.inc +echo >> Make.inc + +for f in $TESTS +do +BNAME=`basename $f .cc` +echo >> Make.inc +echo ${BNAME}_SOURCES=$f >> Make.inc +echo ${BNAME}_LDADD=-lGrid>> Make.inc +echo >> Make.inc +done + +cd .. + + +cd benchmarks + + +echo> Make.inc +TESTS=`ls B*.cc` +TESTLIST=`echo ${TESTS} | sed s/.cc//g ` + +echo > Make.inc +echo bin_PROGRAMS = ${TESTLIST} >> Make.inc +echo >> Make.inc + +for f in $TESTS +do +BNAME=`basename $f .cc` +echo >> Make.inc +echo ${BNAME}_SOURCES=$f >> Make.inc +echo ${BNAME}_LDADD=-lGrid>> Make.inc +echo >> Make.inc +done diff --git a/tests/Make.inc b/tests/Make.inc new file mode 100644 index 00000000..d592f218 --- /dev/null +++ b/tests/Make.inc @@ -0,0 +1,83 @@ + +bin_PROGRAMS = Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_many_cg Test_many_evenodd Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_evenodd + + +Test_cshift_SOURCES=Test_cshift.cc +Test_cshift_LDADD=-lGrid + + +Test_cshift_red_black_SOURCES=Test_cshift_red_black.cc +Test_cshift_red_black_LDADD=-lGrid + + +Test_dwf_cg_prec_SOURCES=Test_dwf_cg_prec.cc +Test_dwf_cg_prec_LDADD=-lGrid + + +Test_dwf_cg_schur_SOURCES=Test_dwf_cg_schur.cc +Test_dwf_cg_schur_LDADD=-lGrid + + +Test_dwf_cg_unprec_SOURCES=Test_dwf_cg_unprec.cc +Test_dwf_cg_unprec_LDADD=-lGrid + + +Test_dwf_even_odd_SOURCES=Test_dwf_even_odd.cc +Test_dwf_even_odd_LDADD=-lGrid + + +Test_gamma_SOURCES=Test_gamma.cc +Test_gamma_LDADD=-lGrid + + +Test_main_SOURCES=Test_main.cc +Test_main_LDADD=-lGrid + + +Test_many_cg_SOURCES=Test_many_cg.cc +Test_many_cg_LDADD=-lGrid + + +Test_many_evenodd_SOURCES=Test_many_evenodd.cc +Test_many_evenodd_LDADD=-lGrid + + +Test_nersc_io_SOURCES=Test_nersc_io.cc +Test_nersc_io_LDADD=-lGrid + + +Test_remez_SOURCES=Test_remez.cc +Test_remez_LDADD=-lGrid + + +Test_rng_SOURCES=Test_rng.cc +Test_rng_LDADD=-lGrid + + +Test_rng_fixed_SOURCES=Test_rng_fixed.cc +Test_rng_fixed_LDADD=-lGrid + + +Test_simd_SOURCES=Test_simd.cc +Test_simd_LDADD=-lGrid + + +Test_stencil_SOURCES=Test_stencil.cc +Test_stencil_LDADD=-lGrid + + +Test_wilson_cg_prec_SOURCES=Test_wilson_cg_prec.cc +Test_wilson_cg_prec_LDADD=-lGrid + + +Test_wilson_cg_schur_SOURCES=Test_wilson_cg_schur.cc +Test_wilson_cg_schur_LDADD=-lGrid + + +Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc +Test_wilson_cg_unprec_LDADD=-lGrid + + +Test_wilson_evenodd_SOURCES=Test_wilson_evenodd.cc +Test_wilson_evenodd_LDADD=-lGrid + diff --git a/tests/Makefile.am b/tests/Makefile.am index bb3448f1..83385001 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -2,91 +2,4 @@ AM_CXXFLAGS = -I$(top_srcdir)/lib AM_LDFLAGS = -L$(top_builddir)/lib -# -# Test code -# -bin_PROGRAMS = Test_main \ - Test_simd \ - Test_gamma \ - Test_cshift \ - Test_cshift_red_black \ - Test_stencil \ - Test_nersc_io \ - Test_rng \ - Test_remez \ - Test_rng_fixed \ - Test_wilson_evenodd \ - Test_wilson_cg_unprec \ - Test_wilson_cg_prec \ - Test_wilson_cg_schur \ - Test_dwf_even_odd\ - Test_dwf_cg_unprec\ - Test_dwf_cg_prec\ - Test_dwf_cg_schur\ - Test_many_evenodd\ - Test_many_cg - - - -Test_main_SOURCES = Test_main.cc -Test_main_LDADD = -lGrid - -Test_rng_SOURCES = Test_rng.cc -Test_rng_LDADD = -lGrid - -Test_rng_fixed_SOURCES = Test_rng_fixed.cc -Test_rng_fixed_LDADD = -lGrid - -Test_remez_SOURCES = Test_remez.cc -Test_remez_LDADD = -lGrid - -Test_nersc_io_SOURCES = Test_nersc_io.cc -Test_nersc_io_LDADD = -lGrid - -Test_cshift_SOURCES = Test_cshift.cc -Test_cshift_LDADD = -lGrid - -Test_cshift_red_black_SOURCES = Test_cshift_red_black.cc -Test_cshift_red_black_LDADD = -lGrid - -Test_gamma_SOURCES = Test_gamma.cc -Test_gamma_LDADD = -lGrid - -Test_stencil_SOURCES = Test_stencil.cc -Test_stencil_LDADD = -lGrid - -Test_simd_SOURCES = Test_simd.cc -Test_simd_LDADD = -lGrid - -#Test_simd_new_SOURCES = Test_simd_new.cc -#Test_simd_new_LDADD = -lGrid - -Test_wilson_evenodd_SOURCES = Test_wilson_evenodd.cc -Test_wilson_evenodd_LDADD = -lGrid - -Test_wilson_cg_unprec_SOURCES = Test_wilson_cg_unprec.cc -Test_wilson_cg_unprec_LDADD = -lGrid - -Test_wilson_cg_prec_SOURCES = Test_wilson_cg_prec.cc -Test_wilson_cg_prec_LDADD = -lGrid - -Test_wilson_cg_schur_SOURCES = Test_wilson_cg_schur.cc -Test_wilson_cg_schur_LDADD = -lGrid - -Test_dwf_even_odd_SOURCES = Test_dwf_even_odd.cc -Test_dwf_even_odd_LDADD = -lGrid - -Test_dwf_cg_unprec_SOURCES = Test_dwf_cg_unprec.cc -Test_dwf_cg_unprec_LDADD = -lGrid - -Test_dwf_cg_prec_SOURCES = Test_dwf_cg_prec.cc -Test_dwf_cg_prec_LDADD = -lGrid - -Test_dwf_cg_schur_SOURCES = Test_dwf_cg_schur.cc -Test_dwf_cg_schur_LDADD = -lGrid - -Test_many_evenodd_SOURCES = Test_many_evenodd.cc -Test_many_evenodd_LDADD = -lGrid - -Test_many_cg_SOURCES = Test_many_cg.cc -Test_many_cg_LDADD = -lGrid +include Make.inc diff --git a/tests/Test_simd_new.cc b/tests/Test_simd_new.cc deleted file mode 100644 index 41781304..00000000 --- a/tests/Test_simd_new.cc +++ /dev/null @@ -1,165 +0,0 @@ -#include -#include "simd/Grid_vector_types.h" -#include - -using namespace std; -using namespace Grid; -using namespace Grid::QCD; - -class funcPlus { -public: - funcPlus() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;} - std::string name(void) const { return std::string("Plus"); } -}; -class funcMinus { -public: - funcMinus() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;} - std::string name(void) const { return std::string("Minus"); } -}; -class funcTimes { -public: - funcTimes() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;} - std::string name(void) const { return std::string("Times"); } -}; -class funcConj { -public: - funcConj() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);} - std::string name(void) const { return std::string("Conj"); } -}; -class funcAdj { -public: - funcAdj() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);} - std::string name(void) const { return std::string("Adj"); } -}; - -class funcTimesI { -public: - funcTimesI() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);} - std::string name(void) const { return std::string("timesI"); } -}; - -class funcTimesMinusI { -public: - funcTimesMinusI() {}; - template void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);} - std::string name(void) const { return std::string("timesMinusI"); } -}; - -template -void Tester(const functor &func) -{ - GridSerialRNG sRNG; - sRNG.SeedRandomDevice(); - - int Nsimd = vec::Nsimd(); - - std::vector input1(Nsimd); - std::vector input2(Nsimd); - std::vector result(Nsimd); - std::vector reference(Nsimd); - - std::vector > buf(3); - vec & v_input1 = buf[0]; - vec & v_input2 = buf[1]; - vec & v_result = buf[2]; - - - for(int i=0;i(v_input1,input1); - merge(v_input2,input2); - merge(v_result,result); - - func(v_result,v_input1,v_input2); - - for(int i=0;i(v_result,result); - std::cout << " " << func.name()<0){ - std::cout<< "*****" << std::endl; - std::cout<< "["< latt_size = GridDefaultLatt(); - std::vector simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); - - GridCartesian Grid(latt_size,simd_layout,mpi_layout); - std::vector seeds({1,2,3,4}); - - // Insist that operations on random scalars gives - // identical results to on vectors. - - std::cout << "==================================="<< std::endl; - std::cout << "Testing MyComplexF "<(funcTimesI()); - Tester(funcTimesMinusI()); - Tester(funcPlus()); - Tester(funcMinus()); - Tester(funcTimes()); - Tester(funcConj()); - Tester(funcAdj()); - - std::cout << "==================================="<< std::endl; - std::cout << "Testing MyComplexD "<(funcTimesI()); - Tester(funcTimesMinusI()); - Tester(funcPlus()); - Tester(funcMinus()); - Tester(funcTimes()); - Tester(funcConj()); - Tester(funcAdj()); - - std::cout << "==================================="<< std::endl; - std::cout << "Testing MyRealF "<(funcPlus()); - Tester(funcMinus()); - Tester(funcTimes()); - Tester(funcAdj()); - - std::cout << "==================================="<< std::endl; - std::cout << "Testing MyRealD "<(funcPlus()); - Tester(funcMinus()); - Tester(funcTimes()); - Tester(funcAdj()); - - Grid_finalize(); -} From 802e94e9ca566f3272972add50319931ba1d3ef1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 4 Jun 2015 00:00:45 +0100 Subject: [PATCH 13/22] First pass at continued fraction; solver and even odd decomposition tests pass. Have to make ContFrac class virtual and derive end non-abstract actions for the particular cases. --- lib/qcd/action/fermion/CayleyFermion5D.cc | 11 +- lib/qcd/action/fermion/CayleyFermion5D.h | 5 +- .../fermion/ContinuedFractionFermion5D.cc | 81 +++++-- .../fermion/ContinuedFractionFermion5D.h | 34 +-- lib/qcd/action/fermion/DomainWallFermion.h | 3 +- lib/qcd/action/fermion/MobiusFermion.h | 3 +- .../action/fermion/MobiusZolotarevFermion.h | 4 +- tests/InvSqrt.gnu | 0 tests/Make.inc | 30 ++- tests/Sqrt.gnu | 2 - tests/{Test_many_cg.cc => Test_cayley_cg.cc} | 0 ...any_evenodd.cc => Test_cayley_even_odd.cc} | 0 tests/Test_contfrac_cg.cc | 147 ++++++++++++ tests/Test_contfrac_even_odd.cc | 218 ++++++++++++++++++ ...son_evenodd.cc => Test_wilson_even_odd.cc} | 0 15 files changed, 490 insertions(+), 48 deletions(-) delete mode 100644 tests/InvSqrt.gnu delete mode 100644 tests/Sqrt.gnu rename tests/{Test_many_cg.cc => Test_cayley_cg.cc} (100%) rename tests/{Test_many_evenodd.cc => Test_cayley_even_odd.cc} (100%) create mode 100644 tests/Test_contfrac_cg.cc create mode 100644 tests/Test_contfrac_even_odd.cc rename tests/{Test_wilson_evenodd.cc => Test_wilson_even_odd.cc} (100%) diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index be528e79..e47ff331 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -229,7 +229,14 @@ namespace QCD { } } - void CayleyFermion5D::SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c) + // Tanh + void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) + { + SetCoefficientsZolotarev(1.0,zdata,b,c); + + } + //Zolo + void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) { /////////////////////////////////////////////////////////// @@ -266,7 +273,7 @@ namespace QCD { double bmc = b-c; for(int i=0; i < Ls; i++){ as[i] = 1.0; - omega[i] = ((double)zdata->gamma[i]); //NB reciprocal relative to Chroma NEF code + omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code bs[i] = 0.5*(bpc/omega[i] + bmc); cs[i] = 0.5*(bpc/omega[i] - bmc); } diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index 57c71992..e2175d77 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -20,7 +20,7 @@ namespace Grid { virtual void MooeeDag (const LatticeFermion &in, LatticeFermion &out); virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); - + virtual void Instantiatable(void)=0; // protected: RealD mass; @@ -52,7 +52,8 @@ namespace Grid { RealD _mass,RealD _M5); protected: - void SetCoefficients(RealD scale,Approx::zolotarev_data *zdata,RealD b,RealD c); + void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); + void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); }; } diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc index c281b486..250e365f 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc @@ -1,9 +1,56 @@ #include namespace Grid { - namespace QCD { + void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) + { + SetCoefficientsZolotarev(1.0,zdata,b,c); + } + void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) + { + R=(1+this->mass)/(1-this->mass); + + Beta.resize(Ls); + cc.resize(Ls); + cc_d.resize(Ls); + sqrt_cc.resize(Ls); + for(int i=0; i < Ls ; i++){ + Beta[i] = zdata -> beta[i]; + cc[i] = 1.0/Beta[i]; + cc_d[i]=sqrt(cc[i]); + } + + cc_d[Ls-1]=1.0; + for(int i=0; i < Ls-1 ; i++){ + sqrt_cc[i]= sqrt(cc[i]*cc[i+1]); + } + sqrt_cc[Ls-2]=sqrt(cc[Ls-2]); + + + ZoloHiInv =1.0/zolo_hi; + double dw_diag = (4.0-M5)*ZoloHiInv; + + See.resize(Ls); + Aee.resize(Ls); + int sign=1; + for(int s=0;sM5)*scale; + double dw_diag = (4.0-M5)*ZoloHiInv; int sign=1; for(int s=0;smass)/(1-this->mass); + double R=(1+mass)/(1-mass); ag5xpby_ssp(chi,Beta[s]*dw_diag,psi,sqrt_cc[s-1],psi,s,s-1); ag5xpby_ssp(chi,R,psi,1.0,chi,s,s); } else { @@ -80,7 +131,7 @@ namespace Grid { void ContinuedFractionFermion5D::MooeeInv (const LatticeFermion &psi, LatticeFermion &chi) { // Apply Linv - axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); + axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); for(int s=1;sLs-1,this->Ls-1); + axpby_ssp(chi,1.0/cc_d[Ls-1],chi,0.0,chi,Ls-1,Ls-1); for(int s=Ls-2;s>=0;s--){ axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1); } @@ -112,6 +163,10 @@ namespace Grid { FourDimGrid, FourDimRedBlackGrid,M5), mass(_mass) { + assert((Ls&0x1)==1); // Odd Ls required + int nrational=Ls-1;// Even rational order + zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham + SetCoefficientsTanh(zdata,1.0,0.0); } } diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h index 7f5c022a..99365009 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -21,20 +21,8 @@ namespace Grid { virtual void MooeeInv (const LatticeFermion &in, LatticeFermion &out); virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); - private: - - Approx::zolotarev_data *zdata; - - // Cont frac - RealD mass; - RealD R; - RealD scale; - std::vector Beta; - std::vector cc;; - std::vector cc_d;; - std::vector sqrt_cc; - std::vector See; - std::vector Aee; + // virtual void Instantiatable(void)=0; + virtual void Instantiatable(void) {}; // Constructors ContinuedFractionFermion5D(LatticeGaugeField &_Umu, @@ -44,6 +32,24 @@ namespace Grid { GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD M5); + protected: + + void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); + void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c); + + Approx::zolotarev_data *zdata; + + // Cont frac + RealD mass; + RealD R; + RealD ZoloHiInv; + std::vector Beta; + std::vector cc;; + std::vector cc_d;; + std::vector sqrt_cc; + std::vector See; + std::vector Aee; + }; diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h index 3e6a9739..a25c0c3c 100644 --- a/lib/qcd/action/fermion/DomainWallFermion.h +++ b/lib/qcd/action/fermion/DomainWallFermion.h @@ -11,6 +11,7 @@ namespace Grid { { public: + virtual void Instantiatable(void) {}; // Constructors DomainWallFermion(LatticeGaugeField &_Umu, GridCartesian &FiveDimGrid, @@ -33,7 +34,7 @@ namespace Grid { std::cout << "DomainWallFermion with Ls="<CayleyFermion5D::SetCoefficients(1.0,zdata,1.0,0.0); + this->CayleyFermion5D::SetCoefficientsTanh(zdata,1.0,0.0); } diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h index 4c291fad..33f94089 100644 --- a/lib/qcd/action/fermion/MobiusFermion.h +++ b/lib/qcd/action/fermion/MobiusFermion.h @@ -11,6 +11,7 @@ namespace Grid { { public: + virtual void Instantiatable(void) {}; // Constructors MobiusFermion(LatticeGaugeField &_Umu, GridCartesian &FiveDimGrid, @@ -34,7 +35,7 @@ namespace Grid { assert(zdata->n==this->Ls); // Call base setter - this->CayleyFermion5D::SetCoefficients(1.0,zdata,b,c); + this->CayleyFermion5D::SetCoefficientsTanh(zdata,b,c); } diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h index 9ac795d9..1be61601 100644 --- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h +++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h @@ -11,6 +11,7 @@ namespace Grid { { public: + virtual void Instantiatable(void) {}; // Constructors MobiusZolotarevFermion(LatticeGaugeField &_Umu, GridCartesian &FiveDimGrid, @@ -34,10 +35,9 @@ namespace Grid { assert(zdata->n==this->Ls); std::cout << "MobiusZolotarevFermion (b="<CayleyFermion5D::SetCoefficients(1.0,zdata,b,c); + this->CayleyFermion5D::SetCoefficientsZolotarev(hi,zdata,b,c); } diff --git a/tests/InvSqrt.gnu b/tests/InvSqrt.gnu deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/Make.inc b/tests/Make.inc index d592f218..b525874d 100644 --- a/tests/Make.inc +++ b/tests/Make.inc @@ -1,5 +1,21 @@ -bin_PROGRAMS = Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_many_cg Test_many_evenodd Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_evenodd +bin_PROGRAMS = Test_cayley_cg Test_cayley_even_odd Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_even_odd Test_gamma Test_main Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_even_odd + + +Test_cayley_cg_SOURCES=Test_cayley_cg.cc +Test_cayley_cg_LDADD=-lGrid + + +Test_cayley_even_odd_SOURCES=Test_cayley_even_odd.cc +Test_cayley_even_odd_LDADD=-lGrid + + +Test_contfrac_cg_SOURCES=Test_contfrac_cg.cc +Test_contfrac_cg_LDADD=-lGrid + + +Test_contfrac_even_odd_SOURCES=Test_contfrac_even_odd.cc +Test_contfrac_even_odd_LDADD=-lGrid Test_cshift_SOURCES=Test_cshift.cc @@ -34,14 +50,6 @@ Test_main_SOURCES=Test_main.cc Test_main_LDADD=-lGrid -Test_many_cg_SOURCES=Test_many_cg.cc -Test_many_cg_LDADD=-lGrid - - -Test_many_evenodd_SOURCES=Test_many_evenodd.cc -Test_many_evenodd_LDADD=-lGrid - - Test_nersc_io_SOURCES=Test_nersc_io.cc Test_nersc_io_LDADD=-lGrid @@ -78,6 +86,6 @@ Test_wilson_cg_unprec_SOURCES=Test_wilson_cg_unprec.cc Test_wilson_cg_unprec_LDADD=-lGrid -Test_wilson_evenodd_SOURCES=Test_wilson_evenodd.cc -Test_wilson_evenodd_LDADD=-lGrid +Test_wilson_even_odd_SOURCES=Test_wilson_even_odd.cc +Test_wilson_even_odd_LDADD=-lGrid diff --git a/tests/Sqrt.gnu b/tests/Sqrt.gnu deleted file mode 100644 index ae56ab97..00000000 --- a/tests/Sqrt.gnu +++ /dev/null @@ -1,2 +0,0 @@ -f(x) = 6.81384+(-2.34645e-06/(x+0.000228091))+(-1.51593e-05/(x+0.00112084))+(-6.89254e-05/(x+0.003496))+(-0.000288983/(x+0.00954309))+(-0.00119277/(x+0.024928))+(-0.0050183/(x+0.0646627))+(-0.0226449/(x+0.171576))+(-0.123767/(x+0.491792))+(-1.1705/(x+1.78667))+(-102.992/(x+18.4866)); -f(x) = 0.14676+(0.00952992/(x+5.40933e-05))+(0.0115952/(x+0.000559699))+(0.0161824/(x+0.00203338))+(0.0243252/(x+0.00582831))+(0.0379533/(x+0.0154649))+(0.060699/(x+0.0401156))+(0.100345/(x+0.104788))+(0.178335/(x+0.286042))+(0.381586/(x+0.892189))+(1.42625/(x+4.38422)); diff --git a/tests/Test_many_cg.cc b/tests/Test_cayley_cg.cc similarity index 100% rename from tests/Test_many_cg.cc rename to tests/Test_cayley_cg.cc diff --git a/tests/Test_many_evenodd.cc b/tests/Test_cayley_even_odd.cc similarity index 100% rename from tests/Test_many_evenodd.cc rename to tests/Test_cayley_even_odd.cc diff --git a/tests/Test_contfrac_cg.cc b/tests/Test_contfrac_cg.cc new file mode 100644 index 00000000..7fa0d6fc --- /dev/null +++ b/tests/Test_contfrac_cg.cc @@ -0,0 +1,147 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + +template +void TestCGinversions(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); +template +void TestCGschur(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + +template +void TestCGunprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + +template +void TestCGprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout << "Grid is setup to use "< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + std::vector U(4,UGrid); + + RealD mass=0.1; + RealD M5 =1.8; + std::cout <<"ContinuedFractionFermion test"<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + Grid_finalize(); +} +template +void TestCGinversions(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + std::cout << "Testing unpreconditioned inverter"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); + std::cout << "Testing red black preconditioned inverter"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); + std::cout << "Testing red black Schur inverter"<(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); +} + +template +void TestCGunprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion result(FGrid); result=zero; + + HermitianOperator HermOp(Ddwf); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOp,src,result); + +} +template +void TestCGprec(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion src_o(FrbGrid); + LatticeFermion result_o(FrbGrid); + pickCheckerboard(Odd,src_o,src); + result_o=zero; + + HermitianCheckerBoardedOperator HermOpEO(Ddwf); + ConjugateGradient CG(1.0e-8,10000); + CG(HermOpEO,src_o,result_o); +} + + +template +void TestCGschur(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion result(FGrid); result=zero; + + ConjugateGradient CG(1.0e-8,10000); + SchurRedBlackSolve SchurSolver(CG); + SchurSolver(Ddwf,src,result); +} diff --git a/tests/Test_contfrac_even_odd.cc b/tests/Test_contfrac_even_odd.cc new file mode 100644 index 00000000..801bd955 --- /dev/null +++ b/tests/Test_contfrac_even_odd.cc @@ -0,0 +1,218 @@ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + +template +void TestWhat(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, GridParallelRNG *RNG5); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout << "Grid is setup to use "< seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); random(RNG4,Umu); + std::vector U(4,UGrid); + + RealD mass=0.1; + RealD M5 =1.8; + std::cout <<"ContinuedFractionFermion test"<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + Grid_finalize(); +} + +template +void TestWhat(What & Ddwf, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + + LatticeFermion src (FGrid); random(*RNG5,src); + LatticeFermion phi (FGrid); random(*RNG5,phi); + LatticeFermion chi (FGrid); random(*RNG5,chi); + LatticeFermion result(FGrid); result=zero; + LatticeFermion ref(FGrid); ref=zero; + LatticeFermion tmp(FGrid); tmp=zero; + LatticeFermion err(FGrid); tmp=zero; + + LatticeFermion src_e (FrbGrid); + LatticeFermion src_o (FrbGrid); + LatticeFermion r_e (FrbGrid); + LatticeFermion r_o (FrbGrid); + LatticeFermion r_eo (FGrid); + LatticeFermion r_eeoo(FGrid); + + std::cout<<"=========================================================="< * = < chi | Deo^dag| phi> "< Date: Thu, 4 Jun 2015 00:23:16 +0100 Subject: [PATCH 14/22] Implementing the Hw kernel continued fraction 5d overlap cases --- lib/qcd/action/Actions.h | 2 + .../fermion/ContinuedFractionFermion5D.cc | 9 ++-- .../fermion/ContinuedFractionFermion5D.h | 6 +-- .../OverlapWilsonContfracTanhFermion.h | 39 ++++++++++++++++ .../OverlapWilsonContfracZolotarevFermion.h | 44 +++++++++++++++++++ 5 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h create mode 100644 lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index 8a8c4642..b31e9136 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -55,6 +55,8 @@ // Continued fraction ////////////////////// #include +#include +#include ////////////////////// // Partial fraction diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc index 250e365f..92f6473e 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.cc @@ -3,11 +3,11 @@ namespace Grid { namespace QCD { - void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) + void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale) { - SetCoefficientsZolotarev(1.0,zdata,b,c); + SetCoefficientsZolotarev(1.0/scale,zdata); } - void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) + void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata) { R=(1+this->mass)/(1-this->mass); @@ -164,9 +164,6 @@ namespace Grid { mass(_mass) { assert((Ls&0x1)==1); // Odd Ls required - int nrational=Ls-1;// Even rational order - zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham - SetCoefficientsTanh(zdata,1.0,0.0); } } diff --git a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h index 99365009..f363878f 100644 --- a/lib/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/lib/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -22,7 +22,7 @@ namespace Grid { virtual void MooeeInvDag (const LatticeFermion &in, LatticeFermion &out); // virtual void Instantiatable(void)=0; - virtual void Instantiatable(void) {}; + virtual void Instantiatable(void) =0; // Constructors ContinuedFractionFermion5D(LatticeGaugeField &_Umu, @@ -34,8 +34,8 @@ namespace Grid { protected: - void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); - void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c); + void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale); + void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);; Approx::zolotarev_data *zdata; diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h new file mode 100644 index 00000000..4865f169 --- /dev/null +++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h @@ -0,0 +1,39 @@ +#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H +#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D + { + public: + + virtual void Instantiatable(void){}; + // Constructors + OverlapWilsonContFracTanhFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale) : + + // b+c=scale, b-c = 0 <=> b =c = scale/2 + ContinuedFractionFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass) + { + assert((Ls&0x1)==1); // Odd Ls required + int nrational=Ls-1;// Even rational order + zdata = Approx::grid_higham(1.0,nrational);// eps is ignored for higham + SetCoefficientsTanh(zdata,scale); + } + }; + } +} +#endif diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h new file mode 100644 index 00000000..7478c062 --- /dev/null +++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h @@ -0,0 +1,44 @@ +#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H +#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D + { + public: + + virtual void Instantiatable(void){}; + // Constructors + OverlapWilsonContFracZolotarevFermion(LatticeGaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo,RealD hi): + + // b+c=scale, b-c = 0 <=> b =c = scale/2 + ContinuedFractionFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass) + { + assert((Ls&0x1)==1); // Odd Ls required + + int nrational=Ls-1;// Even rational order + RealD eps = lo/hi; + + Approx::zolotarev_data *zdata = Approx::grid_zolotarev(eps,nrational,0); + + SetCoefficientsZolotarev(hi,zdata); + + } + }; + } +} +#endif From 9c1ab656d452f7d06674e721406af250b31422a4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 4 Jun 2015 06:02:00 +0100 Subject: [PATCH 15/22] CG Tests work for wilson kernel cont frac zolo and tanh --- .../fermion/OverlapWilsonContfracTanhFermion.h | 12 ++++++------ .../fermion/OverlapWilsonContfracZolotarevFermion.h | 6 +++--- tests/Test_contfrac_cg.cc | 12 +++++++++--- tests/Test_contfrac_even_odd.cc | 11 ++++++++--- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h index 4865f169..ed0c24dc 100644 --- a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h @@ -1,5 +1,5 @@ -#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H -#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H +#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H +#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #include @@ -23,10 +23,10 @@ namespace Grid { // b+c=scale, b-c = 0 <=> b =c = scale/2 ContinuedFractionFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass) + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5) { assert((Ls&0x1)==1); // Odd Ls required int nrational=Ls-1;// Even rational order diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h index 7478c062..caf01133 100644 --- a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h @@ -1,5 +1,5 @@ -#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H -#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H +#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H +#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #include @@ -26,7 +26,7 @@ namespace Grid { FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, - FourDimRedBlackGrid,_mass) + FourDimRedBlackGrid,_mass,_M5) { assert((Ls&0x1)==1); // Odd Ls required diff --git a/tests/Test_contfrac_cg.cc b/tests/Test_contfrac_cg.cc index 7fa0d6fc..83475254 100644 --- a/tests/Test_contfrac_cg.cc +++ b/tests/Test_contfrac_cg.cc @@ -72,9 +72,15 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5 =1.8; - std::cout <<"ContinuedFractionFermion test"<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + + std::cout <<"OverlapWilsonContFracTanhFermion test"<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonContFracZolotarevFermion test"<(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } diff --git a/tests/Test_contfrac_even_odd.cc b/tests/Test_contfrac_even_odd.cc index 801bd955..e13c1189 100644 --- a/tests/Test_contfrac_even_odd.cc +++ b/tests/Test_contfrac_even_odd.cc @@ -48,9 +48,14 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5 =1.8; - std::cout <<"ContinuedFractionFermion test"<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonContFracTanhFermion test"<(Dcf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout <<"OverlapWilsonContFracZolotarevFermion test"<(Dcfz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); Grid_finalize(); } From c6f2ee91f67cb6da67f2511afc8c0ef018bf6f09 Mon Sep 17 00:00:00 2001 From: neo Date: Thu, 4 Jun 2015 14:17:58 +0900 Subject: [PATCH 16/22] Small modification to the configure files --- INSTALL | 2 +- TODO | 3 + configure | 13 +- docs/doxy.cfg.test | 2305 ---------------------------------- lib/simd/Grid_avx.h | 10 +- lib/simd/Grid_vector_types.h | 3 +- 6 files changed, 13 insertions(+), 2323 deletions(-) delete mode 100644 docs/doxy.cfg.test diff --git a/INSTALL b/INSTALL index 80a61507..f812f5a2 120000 --- a/INSTALL +++ b/INSTALL @@ -1 +1 @@ -/opt/local/share/automake-1.15/INSTALL \ No newline at end of file +/usr/share/automake-1.14/INSTALL \ No newline at end of file diff --git a/TODO b/TODO index 948fb99f..9a428f98 100644 --- a/TODO +++ b/TODO @@ -66,6 +66,9 @@ Insert/Extract * Support for ILDG +* Support different boundary conditions (finite temp, chem. potential ... ) + +* Support different fermion representations? Actions -- coherent framework for implementing actions and their forces. diff --git a/configure b/configure index 9713ae55..2d785792 100755 --- a/configure +++ b/configure @@ -2574,7 +2574,7 @@ test -n "$target_alias" && NONENONEs,x,x, && program_prefix=${target_alias}- -am__api_version='1.15' +am__api_version='1.14' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or @@ -2746,8 +2746,8 @@ test "$program_suffix" != NONE && ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` -# Expand $ac_aux_dir to an absolute path. -am_aux_dir=`cd "$ac_aux_dir" && pwd` +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` if test x"${MISSING+set}" != xset; then case $am_aux_dir in @@ -2766,7 +2766,7 @@ else $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi -if test x"${install_sh+set}" != xset; then +if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; @@ -3094,8 +3094,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # mkdir_p='$(MKDIR_P)' -# We need awk for the "check" target (and possibly the TAP driver). The -# system "awk" is bad on some platforms. +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' @@ -3154,7 +3154,6 @@ END fi - ac_config_headers="$ac_config_headers lib/Grid_config.h" # Check whether --enable-silent-rules was given. diff --git a/docs/doxy.cfg.test b/docs/doxy.cfg.test deleted file mode 100644 index c4b82094..00000000 --- a/docs/doxy.cfg.test +++ /dev/null @@ -1,2305 +0,0 @@ -# Doxyfile 1.8.6 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all text -# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv -# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv -# for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = "Grid" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = 1.0 - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = - -# With the PROJECT_LOGO tag one can specify an logo or icon that is included in -# the documentation. The maximum height of the logo should not exceed 55 pixels -# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo -# to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = ./doxy-en/ - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = NO - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a -# new page for each member. If set to NO, the documentation of a member will be -# part of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines. - -ALIASES = - -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make -# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C -# (default is Fortran), use: inc=Fortran f=C. -# -# Note For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See http://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by by putting a % sign in front of the word -# or globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = YES - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = YES - -# If the EXTRACT_STATIC tag is set to YES all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. When set to YES local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = YES - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO these classes will be included in the various overviews. This option has -# no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO these declarations will be -# included in the documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = YES - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the -# todo list. This list is created by putting \todo commands in the -# documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the -# test list. This list is created by putting \test commands in the -# documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES the list -# will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. Do not use file names with spaces, bibtex cannot handle them. See -# also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO doxygen will only warn about wrong or incomplete parameter -# documentation, but not about the absence of documentation. -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. -# Note: If this tag is empty the current directory is searched. - -INPUT = ../lib \ - ../tests \ - ../benchmarks - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: http://www.gnu.org/software/libiconv) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank the -# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, -# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, -# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, -# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, -# *.qsf, *.as and *.js. - -FILE_PATTERNS = - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER ) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = YES - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# function all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES, then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see http://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the config file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- -# defined cascading style sheet that is included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefor more robust against future updates. -# Doxygen will copy the style sheet file to the output directory. For an example -# see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the stylesheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# http://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to NO can help when comparing the output of multiple runs. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: http://developer.apple.com/tools/xcode/), introduced with -# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler ( hhc.exe). If non-empty -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated ( -# YES) or a normal table of contents ( NO) in the .chm file. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = YES - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# http://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using prerendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = YES - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from http://www.mathjax.org before deployment. -# The default value is: http://cdn.mathjax.org/mathjax/latest. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /