From 93896ce59ede4bc2d4085f3b97c1b02647a8dc3a Mon Sep 17 00:00:00 2001 From: paboyle Date: Tue, 25 Oct 2016 06:12:49 +0100 Subject: [PATCH 1/7] Roll version number --- VERSION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index c12f9497..e7abbba7 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,6 @@ -Version : 0.5.0 +Version : 0.6.0 - AVX512, AVX2, AVX, SSE good - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above +- MPI and MPI3 +- HiRep, Smearing, Generic gauge group From 33d199a0ad3d1be0d583961911c2766d8f8874e4 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Tue, 25 Oct 2016 12:56:40 +0100 Subject: [PATCH 2/7] temporary thread safety in FFT --- lib/FFT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/FFT.h b/lib/FFT.h index 17060dc3..9a59ed01 100644 --- a/lib/FFT.h +++ b/lib/FFT.h @@ -227,7 +227,7 @@ namespace Grid { GridStopWatch timer; timer.Start(); -PARALLEL_FOR_LOOP +//PARALLEL_FOR_LOOP for(int idx=0;idx Date: Wed, 26 Oct 2016 18:50:07 +0100 Subject: [PATCH 3/7] debug message removed --- lib/communicator/Communicator_base.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 9810987d..8b789f4b 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -48,7 +48,6 @@ void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ void *ptr = (void *)heap_top; heap_top += bytes; heap_bytes+= bytes; - std::cout <<"Shm alloc "< Date: Fri, 28 Oct 2016 09:13:09 +0100 Subject: [PATCH 4/7] avx512 build fix; detect clang/gcc intrinsics vs. ICPC --- lib/simd/Grid_avx512.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 62789462..521f804c 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -371,14 +371,9 @@ namespace Optimization { // Some Template specialization // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases -<<<<<<< HEAD -#define GNU_CLANG_COMPILER -#ifdef GNU_CLANG_COMPILER -======= #ifndef __INTEL_COMPILER #warning "Slow reduction due to incomplete reduce intrinsics" ->>>>>>> develop //Complex float Reduce template<> inline Grid::ComplexF Reduce::operator()(__m512 in){ From e74417ca12b4e416d151f06081fe70dd5f1e6651 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 31 Oct 2016 16:31:27 +0000 Subject: [PATCH 5/7] big build system polish --- configure.ac | 266 ++++++++++-------- lib/FFT.h | 2 +- .../iterative/ImplicitlyRestartedLanczos.h | 6 +- 3 files changed, 155 insertions(+), 119 deletions(-) diff --git a/configure.ac b/configure.ac index d6340a71..9fb35e78 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ AC_PREREQ([2.63]) -AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid]) +AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) AC_CANONICAL_BUILD AC_CANONICAL_HOST AC_CANONICAL_TARGET @@ -9,22 +9,33 @@ AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_HEADERS([lib/Config.h]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) - ############### Checks for programs -AC_LANG(C++) CXXFLAGS="-O3 $CXXFLAGS" AC_PROG_CXX AC_PROG_RANLIB -############ openmp ############### +############### Get compiler informations +AC_LANG([C++]) +AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) +AX_COMPILER_VENDOR +AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], + [vendor of C++ compiler that will compile the code]) +AX_GXX_VERSION +AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], + [version of g++ that will compile the code]) + +############### Checks for typedefs, structures, and compiler characteristics +AC_TYPE_SIZE_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T + +############### OpenMP AC_OPENMP - ac_openmp=no - if test "${OPENMP_CXXFLAGS}X" != "X"; then -ac_openmp=yes -AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" -AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS" + ac_openmp=yes + AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" + AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS" fi ############### Checks for header files @@ -37,12 +48,7 @@ AC_CHECK_HEADERS(execinfo.h) AC_CHECK_DECLS([ntohll],[], [], [[#include ]]) AC_CHECK_DECLS([be64toh],[], [], [[#include ]]) -############### Checks for typedefs, structures, and compiler characteristics -AC_TYPE_SIZE_T -AC_TYPE_UINT32_T -AC_TYPE_UINT64_T - -############### GMP and MPFR ################# +############### GMP and MPFR AC_ARG_WITH([gmp], [AS_HELP_STRING([--with-gmp=prefix], [try this for a non-standard install prefix of the GMP library])], @@ -54,10 +60,17 @@ AC_ARG_WITH([mpfr], [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) -################## lapack #################### +############### FFTW3 +AC_ARG_WITH([fftw], + [AS_HELP_STRING([--with-fftw=prefix], + [try this for a non-standard install prefix of the FFTW3 library])], + [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] + [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) + +############### lapack AC_ARG_ENABLE([lapack], [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], - [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) + [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) case ${ac_LAPACK} in no) @@ -67,10 +80,26 @@ case ${ac_LAPACK} in *) AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS" AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS" - AC_DEFINE([USE_LAPACK],[1],[use LAPACK]) + AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; esac -################## first-touch #################### +############### MKL +AC_ARG_ENABLE([mkl], + [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], + [ac_MKL=${enable_mkl}], [ac_MKL=no]) + +case ${ac_MKL} in + no) + ;; + yes) + AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; + *) + AM_CXXFLAGS="-I$ac_MKL/include $AM_CXXFLAGS" + AM_LDFLAGS="-L$ac_MKL/lib $AM_LDFLAGS" + AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; +esac + +############### first-touch AC_ARG_ENABLE([numa], [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) @@ -84,56 +113,44 @@ case ${ac_NUMA} in AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);; esac -################## FFTW3 #################### -AC_ARG_WITH([fftw], - [AS_HELP_STRING([--with-fftw=prefix], - [try this for a non-standard install prefix of the FFTW3 library])], - [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] - [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) - -################ Get compiler informations -AC_LANG([C++]) -AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) -AX_COMPILER_VENDOR -AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], - [vendor of C++ compiler that will compile the code]) -AX_GXX_VERSION -AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], - [version of g++ that will compile the code]) - ############### Checks for library functions CXXFLAGS_CPY=$CXXFLAGS LDFLAGS_CPY=$LDFLAGS CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" LDFLAGS="$AM_LDFLAGS $LDFLAGS" + AC_CHECK_FUNCS([gettimeofday]) -AC_CHECK_LIB([gmp],[__gmpf_init], - [AC_CHECK_LIB([mpfr],[mpfr_init], - [AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])] - [have_mpfr=true] - [LIBS="$LIBS -lmpfr"], - [AC_MSG_ERROR([MPFR library not found])])] - [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])] - [have_gmp=true] - [LIBS="$LIBS -lgmp"], - [AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])]) + +if test "${ac_MKL}x" != "nox"; then + AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [], + [AC_MSG_ERROR("MKL enabled but library not found")]) +fi + +AC_SEARCH_LIBS([__gmpf_init], [gmp], + [AC_SEARCH_LIBS([mpfr_init], [mpfr], + [AC_DEFINE([HAVE_LIBMPFR], [1], + [Define to 1 if you have the `MPFR' library])] + [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] + [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] + [have_gmp=true]) if test "${ac_LAPACK}x" != "nox"; then - AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[], - [AC_MSG_ERROR("LAPACK enabled but library not found")]) -fi -AC_CHECK_LIB([fftw3],[fftw_execute], - [AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] - [have_fftw=true] - [LIBS="$LIBS -lfftw3 -lfftw3f"], - [AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])]) + AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], + [AC_MSG_ERROR("LAPACK enabled but library not found")]) +fi + +AC_SEARCH_LIBS([fftw_execute], [fftw3], + [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], + [AC_MSG_ERROR("single precision FFTW library not found")])] + [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] + [have_fftw=true]) + CXXFLAGS=$CXXFLAGS_CPY LDFLAGS=$LDFLAGS_CPY ############### SIMD instruction selection -AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ - [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ - [ac_SIMD=${enable_simd}],[ac_SIMD=GEN]) +AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=], + [select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN]) case ${ax_cv_cxx_compiler_vendor} in clang|gnu) @@ -153,12 +170,15 @@ case ${ax_cv_cxx_compiler_vendor} in AVX2) AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) SIMD_FLAGS='-mavx2 -mfma';; - AVX512|AVX512MIC|KNL) + AVX512) AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; - IMCI|KNC) + KNC) AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner]) SIMD_FLAGS='';; + KNL) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) + SIMD_FLAGS='-march=knl';; GEN) AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) SIMD_FLAGS='';; @@ -176,9 +196,6 @@ case ${ax_cv_cxx_compiler_vendor} in AVX) AC_DEFINE([AVX1],[1],[AVX intrinsics]) SIMD_FLAGS='-mavx -xavx';; - AVXFMA4) - AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) - SIMD_FLAGS='-mavx -mfma';; AVXFMA) AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4]) SIMD_FLAGS='-mavx -mfma';; @@ -188,12 +205,12 @@ case ${ax_cv_cxx_compiler_vendor} in AVX512) AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) SIMD_FLAGS='-xcore-avx512';; - AVX512MIC|KNL) - AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) - SIMD_FLAGS='-xmic-avx512';; - IMCI|KNC) + KNC) AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner]) SIMD_FLAGS='';; + KNL) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) + SIMD_FLAGS='-xmic-avx512';; GEN) AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) SIMD_FLAGS='';; @@ -208,14 +225,18 @@ AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS" AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS" case ${ac_SIMD} in - AVX512|AVX512MIC|KNL) + AVX512|KNL) AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);; *) ;; esac -############### precision selection -AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) +############### Precision selection +AC_ARG_ENABLE([precision], + [AC_HELP_STRING([--enable-precision=single|double], + [Select default word size of Real])], + [ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) + case ${ac_PRECISION} in single) AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] ) @@ -226,43 +247,49 @@ case ${ac_PRECISION} in esac ############### communication type selection -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) +AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], + [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in none) - AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) + AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) ;; - mpi-auto) - AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) - LX_FIND_MPI - if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi - AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS" - AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS" - AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS" - LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS" + mpi|mpi-auto) + AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) ;; - mpi) - AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) - ;; - mpi3) - AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) + mpi3|mpi3-auto) + AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) ;; shmem) - AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) + AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) ;; *) - AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); + AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); ;; esac +case ${ac_COMMS} in + *-auto) + LX_FIND_MPI + if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi + AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS" + AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS" + AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS" + LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS";; + *) + ;; +esac + AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) -AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ]) -AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] ) +AM_CONDITIONAL(BUILD_COMMS_MPI, + [ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ]) +AM_CONDITIONAL(BUILD_COMMS_MPI3, + [ test "X${ac_COMMS}X" == "Xmpi3X" || test "X${ac_COMMS}X" == "Xmpi3-autoX" ]) AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) ############### RNG selection AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\ - [Select Random Number Generator to be used])],\ - [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) + [Select Random Number Generator to be used])],\ + [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) case ${ac_RNG} in ranlux48) @@ -276,10 +303,11 @@ case ${ac_RNG} in ;; esac -############### timer option +############### Timer option AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\ - [Enable system dependent high res timers])],\ - [ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) + [Enable system dependent high res timers])],\ + [ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) + case ${ac_TIMERS} in yes) AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] ) @@ -293,7 +321,9 @@ case ${ac_TIMERS} in esac ############### Chroma regression test -AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) +AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma], + [Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) + case ${ac_CHROMA} in yes|no) ;; @@ -301,6 +331,7 @@ case ${ac_CHROMA} in AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); ;; esac + AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ]) ############### Doxygen @@ -332,35 +363,36 @@ AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(benchmarks/Makefile) AC_OUTPUT -echo " -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Summary of configuration for $PACKAGE v$VERSION ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ----- PLATFORM ---------------------------------------- -- architecture (build) : $build_cpu -- os (build) : $build_os -- architecture (target) : $target_cpu -- os (target) : $target_os -- compiler vendor : ${ax_cv_cxx_compiler_vendor} -- compiler version : ${ax_cv_gxx_version} +architecture (build) : $build_cpu +os (build) : $build_os +architecture (target) : $target_cpu +os (target) : $target_os +compiler vendor : ${ax_cv_cxx_compiler_vendor} +compiler version : ${ax_cv_gxx_version} ----- BUILD OPTIONS ----------------------------------- -- SIMD : ${ac_SIMD} -- Threading : ${ac_openmp} -- Communications type : ${ac_COMMS} -- Default precision : ${ac_PRECISION} -- RNG choice : ${ac_RNG} -- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` -- LAPACK : ${ac_LAPACK} -- FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` -- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` -- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` +SIMD : ${ac_SIMD} +Threading : ${ac_openmp} +Communications type : ${ac_COMMS} +Default precision : ${ac_PRECISION} +RNG choice : ${ac_RNG} +GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` +LAPACK : ${ac_LAPACK} +FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` +build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` +graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` ----- BUILD FLAGS ------------------------------------- -- CXXFLAGS: +CXXFLAGS: `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'` -- LDFLAGS: +LDFLAGS: `echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'` -- LIBS: +LIBS: `echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'` -------------------------------------------------------- -" +-------------------------------------------------------" > config.summary +echo "" +cat config.summary +echo "" diff --git a/lib/FFT.h b/lib/FFT.h index 9a59ed01..8d359cba 100644 --- a/lib/FFT.h +++ b/lib/FFT.h @@ -30,7 +30,7 @@ Author: Peter Boyle #define _GRID_FFT_H_ #ifdef HAVE_FFTW -#include +#include #endif namespace Grid { diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 1a1a0a16..5d6deae0 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -31,7 +31,11 @@ Author: paboyle #include //memset #ifdef USE_LAPACK -#include +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); #endif #include "DenseMatrix.h" #include "EigenSort.h" From 66d832c7339444799452fd8d3510b1d46b88978c Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 31 Oct 2016 16:39:29 +0000 Subject: [PATCH 6/7] FFTW header fix --- lib/FFT.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/FFT.h b/lib/FFT.h index 8d359cba..f9f91ea6 100644 --- a/lib/FFT.h +++ b/lib/FFT.h @@ -30,7 +30,7 @@ Author: Peter Boyle #define _GRID_FFT_H_ #ifdef HAVE_FFTW -#include +#include #endif namespace Grid { From 07416e4567140655a7153914b8d5f9d69e51598f Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Mon, 31 Oct 2016 18:21:52 +0000 Subject: [PATCH 7/7] README update --- README.md | 112 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 2a0acad6..faf86faf 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,27 @@ **Data parallel C++ mathematical object library.** -Please send all pull requests to the `develop` branch. - License: GPL v2. -Last update 2016/08/03. +Last update Nov 2016. + +_Please send all pull requests to the `develop` branch._ + +### Bug report + +_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ + +When you file an issue, please go though the following checklist: + +1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number. +2. Give a description of the target platform (CPU, network, compiler). +3. Give the exact `configure` command used. +4. Attach `config.log`. +5. Attach `config.summary`. +6. Attach the output of `make V=1`. +7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. + + ### Description This library provides data parallel C++ container classes with internal memory layout @@ -42,7 +58,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi for most programmers. The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way). +Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. @@ -50,7 +66,7 @@ The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `Compl MPI, OpenMP, and SIMD parallelism are present in the library. Please see https://arxiv.org/abs/1512.03487 for more detail. -### Installation +### Quick start First, start by cloning the repository: ``` bash @@ -71,12 +87,10 @@ mkdir build; cd build ../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix= ``` -where `--enable-precision=` set the default precision (`single` or `double`), -`--enable-simd=` set the SIMD type (see possible values below), `--enable- -comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or -`shmem`), and `` should be replaced by the prefix path where you want to -install Grid. The `mpi-auto` communication option set `configure` to determine -automatically how to link to MPI. Other options are available, use `configure +where `--enable-precision=` set the default precision, +`--enable-simd=` set the SIMD type, `--enable- +comms=`, and `` should be replaced by the prefix path where you want to +install Grid. Other options are detailed in the next section, you can also use `configure --help` to display them. Like with any other program using GNU autotool, the `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to customise the build. @@ -93,24 +107,86 @@ To minimise the build time, only the tests at the root of the `tests` directory make -C tests/ tests ``` +### Build configuration options + +- `--prefix=`: installation prefix for Grid. +- `--with-gmp=`: look for GMP in the UNIX prefix `` +- `--with-mpfr=`: look for MPFR in the UNIX prefix `` +- `--with-fftw=`: look for FFTW in the UNIX prefix `` +- `--enable-lapack[=]`: enable LAPACK support in Lanczos eigensolver. A UNIX prefix containing the library can be specified (optional). +- `--enable-mkl[=]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional). +- `--enable-numa`: ??? +- `--enable-simd=`: setup Grid for the SIMD target `` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. +- `--enable-precision={single|double}`: set the default precision (default: `double`). +- `--enable-precision=`: Use `` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. +- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). +- `--disable-timers`: disable system dependent high-resolution timers. +- `--enable-chroma`: enable Chroma regression tests. + +### Possible communication interfaces + +The following options can be use with the `--enable-simd=` option to target different communication interfaces: + +| `` | Description | +| ------------- | -------------------------------------------- | +| `none` | no communications | +| `mpi[-auto]` | MPI communications | +| `mpi3[-auto]` | MPI communications using MPI 3 shared memory | +| `shmem ` | Cray SHMEM communications | + +For `mpi` and `mpi3` the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). + ### Possible SIMD types The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets: -| String | Description | +| `` | Description | | ----------- | -------------------------------------- | | `GEN` | generic portable vector code | | `SSE4` | SSE 4.2 (128 bit) | | `AVX` | AVX (256 bit) | -| `AVXFMA4` | AVX (256 bit) + FMA | +| `AVXFMA` | AVX (256 bit) + FMA | +| `AVXFMA4` | AVX (256 bit) + FMA4 | | `AVX2` | AVX 2 (256 bit) | | `AVX512` | AVX 512 bit | -| `AVX512MIC` | AVX 512 bit for Intel MIC architecture | -| `ICMI` | Intel ICMI instructions (512 bit) | +| `QPX` | QPX (256 bit) | Alternatively, some CPU codenames can be directly used: -| String | Description | +| `` | Description | | ----------- | -------------------------------------- | -| `KNC` | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | -| `KNL` | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | \ No newline at end of file +| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | +| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | +| `BGQ` | Blue Gene/Q | + +#### Notes: +- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions. +- For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform. +- BG/Q performances are currently rather poor. This is being investigated for future versions. + +### Build setup for Intel Knights Landing platform + +The following configuration is recommended for the Intel Knights Landing platform: + +``` bash +../configure --enable-precision=double\ + --enable-simd=KNL \ + --enable-comms=mpi3-auto \ + --with-gmp= \ + --with-mpfr= \ + --enable-mkl \ + CXX=icpc MPICXX=mpiicpc +``` + +where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: + +``` bash +../configure --enable-precision=double\ + --enable-simd=KNL \ + --enable-comms=mpi3 \ + --with-gmp= \ + --with-mpfr= \ + --enable-mkl \ + CXX=CC CC=cc +``` +