From b5af3fbe45c7270baec80d4309362edcdc84237d Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 19 May 2015 13:36:03 +0900 Subject: [PATCH 1/3] Merging with upstream --- README.md | 2 ++ lib/simd/Grid_vector_types.h | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 80714d76..e18ca474 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,8 @@ MPI, OpenMP, and SIMD parallelism are present in the library. by setting variables in the command line or in the environment. Here are examples: + ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4 + ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1 ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 14b2ddf0..360e9f1b 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -37,7 +37,6 @@ namespace Grid { /* @brief Grid_simd class for the SIMD vector type operations - */ template < class Scalar_type, class Vector_type > class Grid_simd { @@ -100,14 +99,9 @@ namespace Grid { template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } - - // do not compile if real or integer, send an error message from the compiler template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} - - - //////////////////////////////////// // Arithmetic operator overloads +,-,* From 4cadf11d1d4f57f28f564d3d779e82353c8c8d38 Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 19 May 2015 13:54:55 +0900 Subject: [PATCH 2/3] Added check of mpfr and gmp at configure time It generates automatically the linker flags or complains if not found. --- README | 2 + configure | 104 +++++++++++++++++++++++++ configure.ac | 16 +++- lib/Grid_config.h | 121 ----------------------------- lib/Grid_config.h.in | 6 ++ lib/Makefile.am | 3 +- lib/algorithms/approx/.dirstamp | 0 lib/algorithms/approx/Remez.cc | 0 lib/algorithms/approx/Remez.h | 0 lib/algorithms/approx/Zolotarev.cc | 0 lib/algorithms/approx/Zolotarev.h | 0 lib/algorithms/approx/bigfloat.h | 0 lib/communicator/.dirstamp | 0 lib/qcd/.dirstamp | 0 lib/simd/Grid_sse4.cpp | 19 +++++ lib/simd/Grid_vector_types.h | 8 +- lib/stamp-h1 | 1 - lib/stencil/.dirstamp | 0 18 files changed, 155 insertions(+), 125 deletions(-) delete mode 100644 lib/Grid_config.h delete mode 100644 lib/algorithms/approx/.dirstamp mode change 100755 => 100644 lib/algorithms/approx/Remez.cc mode change 100755 => 100644 lib/algorithms/approx/Remez.h mode change 100755 => 100644 lib/algorithms/approx/Zolotarev.cc mode change 100755 => 100644 lib/algorithms/approx/Zolotarev.h mode change 100755 => 100644 lib/algorithms/approx/bigfloat.h delete mode 100644 lib/communicator/.dirstamp delete mode 100644 lib/qcd/.dirstamp create mode 100644 lib/simd/Grid_sse4.cpp delete mode 100644 lib/stamp-h1 delete mode 100644 lib/stencil/.dirstamp diff --git a/README b/README index 41b66b6b..17e92fa0 100644 --- a/README +++ b/README @@ -33,6 +33,8 @@ MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is by setting variables in the command line or in the environment. Here is are examples: + ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4 + ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1 ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 diff --git a/configure b/configure index e7d9f32f..8c9e8c59 100755 --- a/configure +++ b/configure @@ -4502,6 +4502,11 @@ _ACEOF # Checks for library functions. +echo +echo Checking libraries +echo ::::::::::::::::::::::::::::::::::::::::::: + + for ac_func in gettimeofday do : ac_fn_cxx_check_func "$LINENO" "gettimeofday" "ac_cv_func_gettimeofday" @@ -4514,6 +4519,105 @@ fi done +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __gmpf_init in -lgmp" >&5 +$as_echo_n "checking for __gmpf_init in -lgmp... " >&6; } +if ${ac_cv_lib_gmp___gmpf_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgmp $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char __gmpf_init (); +int +main () +{ +return __gmpf_init (); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ac_cv_lib_gmp___gmpf_init=yes +else + ac_cv_lib_gmp___gmpf_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gmp___gmpf_init" >&5 +$as_echo "$ac_cv_lib_gmp___gmpf_init" >&6; } +if test "x$ac_cv_lib_gmp___gmpf_init" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBGMP 1 +_ACEOF + + LIBS="-lgmp $LIBS" + +else + as_fn_error $? "GNU Multiple Precision GMP library was not found in your system. +Please install or provide the correct path to your installation +Info at: http://www.gmplib.org" "$LINENO" 5 +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpfr_init in -lmpfr" >&5 +$as_echo_n "checking for mpfr_init in -lmpfr... " >&6; } +if ${ac_cv_lib_mpfr_mpfr_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmpfr $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mpfr_init (); +int +main () +{ +return mpfr_init (); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + ac_cv_lib_mpfr_mpfr_init=yes +else + ac_cv_lib_mpfr_mpfr_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpfr_mpfr_init" >&5 +$as_echo "$ac_cv_lib_mpfr_mpfr_init" >&6; } +if test "x$ac_cv_lib_mpfr_mpfr_init" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBMPFR 1 +_ACEOF + + LIBS="-lmpfr $LIBS" + +else + as_fn_error $? "GNU Multiple Precision MPFR library was not found in your system. +Please install or provide the correct path to your installation +Info at: http://www.mpfr.org/" "$LINENO" 5 +fi + diff --git a/configure.ac b/configure.ac index 14170f4e..93e2b574 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Project Grid package # -# Time-stamp: <2015-05-18 17:14:20 neo> +# Time-stamp: <2015-05-19 13:51:08 neo> AC_PREREQ([2.69]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) @@ -46,8 +46,22 @@ AC_TYPE_UINT32_T AC_TYPE_UINT64_T # Checks for library functions. +echo +echo Checking libraries +echo ::::::::::::::::::::::::::::::::::::::::::: + + AC_CHECK_FUNCS([gettimeofday]) +AC_CHECK_LIB([gmp],[__gmpf_init],, + [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system. +Please install or provide the correct path to your installation +Info at: http://www.gmplib.org)]) + +AC_CHECK_LIB([mpfr],[mpfr_init],, + [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system. +Please install or provide the correct path to your installation +Info at: http://www.mpfr.org/)]) diff --git a/lib/Grid_config.h b/lib/Grid_config.h deleted file mode 100644 index e1674850..00000000 --- a/lib/Grid_config.h +++ /dev/null @@ -1,121 +0,0 @@ -/* lib/Grid_config.h. Generated from Grid_config.h.in by configure. */ -/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */ - -/* AVX */ -/* #undef AVX1 */ - -/* AVX2 */ -/* #undef AVX2 */ - -/* AVX512 */ -/* #undef AVX512 */ - -/* GRID_COMMS_MPI */ -/* #undef GRID_COMMS_MPI */ - -/* GRID_COMMS_NONE */ -#define GRID_COMMS_NONE 1 - -/* Define to 1 if you have the declaration of `be64toh', and to 0 if you - don't. */ -#define HAVE_DECL_BE64TOH 1 - -/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't. - */ -#define HAVE_DECL_NTOHLL 0 - -/* Define to 1 if you have the header file. */ -#define HAVE_ENDIAN_H 1 - -/* Define to 1 if you have the `gettimeofday' function. */ -#define HAVE_GETTIMEOFDAY 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_GMP_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MALLOC_H 1 - -/* Define to 1 if you have the header file. */ -/* #undef HAVE_MALLOC_MALLOC_H */ - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MM_MALLOC_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Name of package */ -#define PACKAGE "grid" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "Grid" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "Grid 1.0" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "grid" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "1.0" - -/* SSE4 */ -#define SSE4 1 - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Version number of package */ -#define VERSION "1.0" - -/* Define for Solaris 2.5.1 so the uint32_t typedef from , - , or is not used. If the typedef were allowed, the - #define below would cause a syntax error. */ -/* #undef _UINT32_T */ - -/* Define for Solaris 2.5.1 so the uint64_t typedef from , - , or is not used. If the typedef were allowed, the - #define below would cause a syntax error. */ -/* #undef _UINT64_T */ - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ - -/* Define to the type of an unsigned integer type of width exactly 32 bits if - such a type exists and the standard includes do not define it. */ -/* #undef uint32_t */ - -/* Define to the type of an unsigned integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef uint64_t */ diff --git a/lib/Grid_config.h.in b/lib/Grid_config.h.in index 0ce09cf5..b7f56d5b 100644 --- a/lib/Grid_config.h.in +++ b/lib/Grid_config.h.in @@ -35,6 +35,12 @@ /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H +/* Define to 1 if you have the `gmp' library (-lgmp). */ +#undef HAVE_LIBGMP + +/* Define to 1 if you have the `mpfr' library (-lmpfr). */ +#undef HAVE_LIBMPFR + /* Define to 1 if you have the header file. */ #undef HAVE_MALLOC_H diff --git a/lib/Makefile.am b/lib/Makefile.am index 938f7ca1..557295d5 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -95,5 +95,6 @@ nobase_include_HEADERS = algorithms/approx/bigfloat.h\ simd/Grid_vComplexF.h\ simd/Grid_vInteger.h\ simd/Grid_vRealD.h\ - simd/Grid_vRealF.h + simd/Grid_vRealF.h\ + simd/Grid_vector_types.h diff --git a/lib/algorithms/approx/.dirstamp b/lib/algorithms/approx/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/algorithms/approx/Remez.cc b/lib/algorithms/approx/Remez.cc old mode 100755 new mode 100644 diff --git a/lib/algorithms/approx/Remez.h b/lib/algorithms/approx/Remez.h old mode 100755 new mode 100644 diff --git a/lib/algorithms/approx/Zolotarev.cc b/lib/algorithms/approx/Zolotarev.cc old mode 100755 new mode 100644 diff --git a/lib/algorithms/approx/Zolotarev.h b/lib/algorithms/approx/Zolotarev.h old mode 100755 new mode 100644 diff --git a/lib/algorithms/approx/bigfloat.h b/lib/algorithms/approx/bigfloat.h old mode 100755 new mode 100644 diff --git a/lib/communicator/.dirstamp b/lib/communicator/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/qcd/.dirstamp b/lib/qcd/.dirstamp deleted file mode 100644 index e69de29b..00000000 diff --git a/lib/simd/Grid_sse4.cpp b/lib/simd/Grid_sse4.cpp new file mode 100644 index 00000000..b0be164b --- /dev/null +++ b/lib/simd/Grid_sse4.cpp @@ -0,0 +1,19 @@ +//---------------------------------------------------------------------- +/*! @file Grid_vector_types.h + @brief Defines templated class to deal with inner vector types +*/ +// Time-stamp: <2015-05-19 13:53:47 neo> +//---------------------------------------------------------------------- + +namespace Optimization { + + + + +} + +// Here assign types +namespace Grid { + + +} diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 360e9f1b..7c2b1bed 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -1,3 +1,9 @@ +//---------------------------------------------------------------------- +/*! @file Grid_vector_types.h + @brief Defines templated class to deal with inner vector types +*/ +// Time-stamp: <2015-05-19 13:41:47 neo> +//---------------------------------------------------------------------- #ifndef GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES @@ -13,8 +19,8 @@ namespace Grid { struct RealPart< std::complex >{ typedef T type; }; - //////////////////////////////////////////////////////// + //////////////////////////////////////////////////////// // Check for complexity with type traits template struct is_complex : std::false_type {}; diff --git a/lib/stamp-h1 b/lib/stamp-h1 deleted file mode 100644 index 753f890f..00000000 --- a/lib/stamp-h1 +++ /dev/null @@ -1 +0,0 @@ -timestamp for lib/Grid_config.h diff --git a/lib/stencil/.dirstamp b/lib/stencil/.dirstamp deleted file mode 100644 index e69de29b..00000000 From b29caead322d1ead527c3f78316a4b67ed9fe17d Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 19 May 2015 17:21:17 +0900 Subject: [PATCH 3/3] Partial implementation of the vector types SIMD Implementing SSE4 now A systematic series of tests must be written. --- lib/Grid_config.h | 127 +++++++++++++++++++++ lib/Makefile.am | 158 +++++++++++++------------- lib/algorithms/approx/.dirstamp | 0 lib/communicator/.dirstamp | 0 lib/qcd/.dirstamp | 0 lib/simd/.dirstamp | 0 lib/simd/Grid_sse4.cpp | 19 ---- lib/simd/Grid_sse4.h | 194 ++++++++++++++++++++++++++++++++ lib/simd/Grid_vector_types.h | 125 ++++++++++++-------- lib/stamp-h1 | 1 + lib/stencil/.dirstamp | 0 tests/Grid_main.cc | 37 ++++++ 12 files changed, 516 insertions(+), 145 deletions(-) create mode 100644 lib/Grid_config.h create mode 100644 lib/algorithms/approx/.dirstamp create mode 100644 lib/communicator/.dirstamp create mode 100644 lib/qcd/.dirstamp create mode 100644 lib/simd/.dirstamp delete mode 100644 lib/simd/Grid_sse4.cpp create mode 100644 lib/simd/Grid_sse4.h create mode 100644 lib/stamp-h1 create mode 100644 lib/stencil/.dirstamp diff --git a/lib/Grid_config.h b/lib/Grid_config.h new file mode 100644 index 00000000..78582f3e --- /dev/null +++ b/lib/Grid_config.h @@ -0,0 +1,127 @@ +/* lib/Grid_config.h. Generated from Grid_config.h.in by configure. */ +/* lib/Grid_config.h.in. Generated from configure.ac by autoheader. */ + +/* AVX */ +/* #undef AVX1 */ + +/* AVX2 */ +/* #undef AVX2 */ + +/* AVX512 */ +/* #undef AVX512 */ + +/* GRID_COMMS_MPI */ +/* #undef GRID_COMMS_MPI */ + +/* GRID_COMMS_NONE */ +#define GRID_COMMS_NONE 1 + +/* Define to 1 if you have the declaration of `be64toh', and to 0 if you + don't. */ +#define HAVE_DECL_BE64TOH 1 + +/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't. + */ +#define HAVE_DECL_NTOHLL 0 + +/* Define to 1 if you have the header file. */ +#define HAVE_ENDIAN_H 1 + +/* Define to 1 if you have the `gettimeofday' function. */ +#define HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GMP_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `gmp' library (-lgmp). */ +#define HAVE_LIBGMP 1 + +/* Define to 1 if you have the `mpfr' library (-lmpfr). */ +#define HAVE_LIBMPFR 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MALLOC_MALLOC_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MM_MALLOC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Name of package */ +#define PACKAGE "grid" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "Grid" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "Grid 1.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "grid" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.0" + +/* SSE4 */ +#define SSE4 1 + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "1.0" + +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT32_T */ + +/* Define for Solaris 2.5.1 so the uint64_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT64_T */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint32_t */ + +/* Define to the type of an unsigned integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint64_t */ diff --git a/lib/Makefile.am b/lib/Makefile.am index 557295d5..82459763 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -14,87 +14,89 @@ endif # Libraries # lib_LIBRARIES = libGrid.a -libGrid_a_SOURCES =\ - Grid_init.cc\ - stencil/Grid_stencil_common.cc\ - qcd/Grid_qcd_dirac.cc\ - qcd/Grid_qcd_wilson_dop.cc\ - algorithms/approx/Zolotarev.cc\ - algorithms/approx/Remez.cc\ +libGrid_a_SOURCES = \ + Grid_init.cc \ + stencil/Grid_stencil_common.cc \ + qcd/Grid_qcd_dirac.cc \ + qcd/Grid_qcd_wilson_dop.cc \ + algorithms/approx/Zolotarev.cc \ + algorithms/approx/Remez.cc \ $(extra_sources) # # Include files # -nobase_include_HEADERS = algorithms/approx/bigfloat.h\ - algorithms/approx/Chebyshev.h\ - algorithms/approx/Remez.h\ - algorithms/approx/Zolotarev.h\ - algorithms/iterative/ConjugateGradient.h\ - algorithms/iterative/NormalEquations.h\ - algorithms/iterative/SchurRedBlack.h\ - algorithms/LinearOperator.h\ - algorithms/SparseMatrix.h\ - cartesian/Grid_cartesian_base.h\ - cartesian/Grid_cartesian_full.h\ - cartesian/Grid_cartesian_red_black.h\ - communicator/Grid_communicator_base.h\ - cshift/Grid_cshift_common.h\ - cshift/Grid_cshift_mpi.h\ - cshift/Grid_cshift_none.h\ - Grid.h\ - Grid_algorithms.h\ - Grid_aligned_allocator.h\ - Grid_cartesian.h\ - Grid_communicator.h\ - Grid_comparison.h\ - Grid_cshift.h\ - Grid_extract.h\ - Grid_lattice.h\ - Grid_math.h\ - Grid_simd.h\ - Grid_stencil.h\ - Grid_threads.h\ - lattice/Grid_lattice_arith.h\ - lattice/Grid_lattice_base.h\ - lattice/Grid_lattice_comparison.h\ - lattice/Grid_lattice_conformable.h\ - lattice/Grid_lattice_coordinate.h\ - lattice/Grid_lattice_ET.h\ - lattice/Grid_lattice_local.h\ - lattice/Grid_lattice_overload.h\ - lattice/Grid_lattice_peekpoke.h\ - lattice/Grid_lattice_reality.h\ - lattice/Grid_lattice_reduction.h\ - lattice/Grid_lattice_rng.h\ - lattice/Grid_lattice_trace.h\ - lattice/Grid_lattice_transfer.h\ - lattice/Grid_lattice_transpose.h\ - lattice/Grid_lattice_where.h\ - math/Grid_math_arith.h\ - math/Grid_math_arith_add.h\ - math/Grid_math_arith_mac.h\ - math/Grid_math_arith_mul.h\ - math/Grid_math_arith_scalar.h\ - math/Grid_math_arith_sub.h\ - math/Grid_math_inner.h\ - math/Grid_math_outer.h\ - math/Grid_math_peek.h\ - math/Grid_math_poke.h\ - math/Grid_math_reality.h\ - math/Grid_math_tensors.h\ - math/Grid_math_trace.h\ - math/Grid_math_traits.h\ - math/Grid_math_transpose.h\ - parallelIO/GridNerscIO.h\ - qcd/Grid_qcd.h\ - qcd/Grid_qcd_2spinor.h\ - qcd/Grid_qcd_dirac.h\ - qcd/Grid_qcd_wilson_dop.h\ - simd/Grid_vComplexD.h\ - simd/Grid_vComplexF.h\ - simd/Grid_vInteger.h\ - simd/Grid_vRealD.h\ - simd/Grid_vRealF.h\ - simd/Grid_vector_types.h +nobase_include_HEADERS = algorithms/approx/bigfloat.h \ + algorithms/approx/Chebyshev.h \ + algorithms/approx/Remez.h \ + algorithms/approx/Zolotarev.h \ + algorithms/iterative/ConjugateGradient.h \ + algorithms/iterative/NormalEquations.h \ + algorithms/iterative/SchurRedBlack.h \ + algorithms/LinearOperator.h \ + algorithms/SparseMatrix.h \ + cartesian/Grid_cartesian_base.h \ + cartesian/Grid_cartesian_full.h \ + cartesian/Grid_cartesian_red_black.h \ + communicator/Grid_communicator_base.h \ + cshift/Grid_cshift_common.h \ + cshift/Grid_cshift_mpi.h \ + cshift/Grid_cshift_none.h \ + Grid.h \ + Grid_algorithms.h \ + Grid_aligned_allocator.h \ + Grid_cartesian.h \ + Grid_communicator.h \ + Grid_comparison.h \ + Grid_cshift.h \ + Grid_extract.h \ + Grid_lattice.h \ + Grid_math.h \ + Grid_simd.h \ + Grid_stencil.h \ + Grid_threads.h \ + lattice/Grid_lattice_arith.h \ + lattice/Grid_lattice_base.h \ + lattice/Grid_lattice_comparison.h \ + lattice/Grid_lattice_conformable.h \ + lattice/Grid_lattice_coordinate.h \ + lattice/Grid_lattice_ET.h \ + lattice/Grid_lattice_local.h \ + lattice/Grid_lattice_overload.h \ + lattice/Grid_lattice_peekpoke.h \ + lattice/Grid_lattice_reality.h \ + lattice/Grid_lattice_reduction.h \ + lattice/Grid_lattice_rng.h \ + lattice/Grid_lattice_trace.h \ + lattice/Grid_lattice_transfer.h \ + lattice/Grid_lattice_transpose.h \ + lattice/Grid_lattice_where.h \ + math/Grid_math_arith.h \ + math/Grid_math_arith_add.h \ + math/Grid_math_arith_mac.h \ + math/Grid_math_arith_mul.h \ + math/Grid_math_arith_scalar.h \ + math/Grid_math_arith_sub.h \ + math/Grid_math_inner.h \ + math/Grid_math_outer.h \ + math/Grid_math_peek.h \ + math/Grid_math_poke.h \ + math/Grid_math_reality.h \ + math/Grid_math_tensors.h \ + math/Grid_math_trace.h \ + math/Grid_math_traits.h \ + math/Grid_math_transpose.h \ + parallelIO/GridNerscIO.h \ + qcd/Grid_qcd.h \ + qcd/Grid_qcd_2spinor.h \ + qcd/Grid_qcd_dirac.h \ + qcd/Grid_qcd_wilson_dop.h \ + simd/Grid_vComplexD.h \ + simd/Grid_vComplexF.h \ + simd/Grid_vInteger.h \ + simd/Grid_vRealD.h \ + simd/Grid_vRealF.h \ + simd/Grid_vector_types.h \ + simd/Grid_sse4.h + diff --git a/lib/algorithms/approx/.dirstamp b/lib/algorithms/approx/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/communicator/.dirstamp b/lib/communicator/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/qcd/.dirstamp b/lib/qcd/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/simd/.dirstamp b/lib/simd/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/lib/simd/Grid_sse4.cpp b/lib/simd/Grid_sse4.cpp deleted file mode 100644 index b0be164b..00000000 --- a/lib/simd/Grid_sse4.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//---------------------------------------------------------------------- -/*! @file Grid_vector_types.h - @brief Defines templated class to deal with inner vector types -*/ -// Time-stamp: <2015-05-19 13:53:47 neo> -//---------------------------------------------------------------------- - -namespace Optimization { - - - - -} - -// Here assign types -namespace Grid { - - -} diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h new file mode 100644 index 00000000..ed4039b7 --- /dev/null +++ b/lib/simd/Grid_sse4.h @@ -0,0 +1,194 @@ +//---------------------------------------------------------------------- +/*! @file Grid_sse4.h + @brief Optimization libraries +*/ +// Time-stamp: <2015-05-19 17:06:51 neo> +//---------------------------------------------------------------------- + +#include + +namespace Optimization { + + struct Vsplat{ + //Complex float + inline __m128 operator()(float a, float b){ + return _mm_set_ps(b,a,b,a); + } + // Real float + inline __m128 operator()(float a){ + return _mm_set_ps(a,a,a,a); + } + //Complex double + inline __m128d operator()(double a, double b){ + return _mm_set_pd(b,a); + } + //Real double + inline __m128d operator()(double a){ + return _mm_set_pd(a,a); + } + //Integer + inline __m128i operator()(Integer a){ + return _mm_set1_epi32(a); + } + }; + + struct Vstore{ + //Float + inline void operator()(__m128 a, float* F){ + _mm_store_ps(F,a); + } + //Double + inline void operator()(__m128d a, double* D){ + _mm_store_pd(D,a); + } + //Integer + inline void operator()(__m128i a, Integer* I){ + _mm_store_si128((__m128i *)I,a); + } + + }; + + + + struct Vset{ + // Complex float + inline __m128 operator()(Grid::ComplexF *a){ + return _mm_set_ps(a[1].imag(), a[1].real(),a[0].imag(),a[0].real()); + } + // Complex double + inline __m128d operator()(Grid::ComplexD *a){ + return _mm_set_pd(a[0].imag(),a[0].real()); + } + // Real float + inline __m128 operator()(float *a){ + return _mm_set_ps(a[3],a[2],a[1],a[0]); + } + // Real double + inline __m128d operator()(double *a){ + return _mm_set_pd(a[1],a[0]); + } + // Integer + inline __m128i operator()(Integer *a){ + return _mm_set_epi32(a[0],a[1],a[2],a[3]); + } + + + }; + + struct Reduce{ + //Complex float + inline Grid::ComplexF operator()(__m128 in){ + union { + __m128 v1; + float f[4]; + } u128; + u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros + return Grid::ComplexF(u128.f[0], u128.f[1]); + } + //Complex double + inline Grid::ComplexD operator()(__m128d in){ + printf("Missing complex double implementation -> FIX\n"); + return Grid::ComplexD(0,0); // FIXME wrong + } + + + + }; + + + ///////////////////////////////////////////////////// + // Arithmetic operations + ///////////////////////////////////////////////////// + struct Sum{ + //Complex/Real float + inline __m128 operator()(__m128 a, __m128 b){ + return _mm_add_ps(a,b); + } + //Complex/Real double + inline __m128d operator()(__m128d a, __m128d b){ + return _mm_add_pd(a,b); + } + //Integer + inline __m128i operator()(__m128i a, __m128i b){ + return _mm_add_epi32(a,b); + } + }; + + struct Sub{ + //Complex/Real float + inline __m128 operator()(__m128 a, __m128 b){ + return _mm_sub_ps(a,b); + } + //Complex/Real double + inline __m128d operator()(__m128d a, __m128d b){ + return _mm_sub_pd(a,b); + } + //Integer + inline __m128i operator()(__m128i a, __m128i b){ + return _mm_sub_epi32(a,b); + } + }; + + struct MultComplex{ + // Complex float + inline __m128 operator()(__m128 a, __m128 b){ + __m128 ymm0,ymm1,ymm2; + ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar, + ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br + ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi + ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai + ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi + return _mm_addsub_ps(ymm0,ymm1); + } + // Complex double + inline __m128d operator()(__m128d a, __m128d b){ + __m128d ymm0,ymm1,ymm2; + ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, + ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br + ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01 + ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11 + ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi + return _mm_addsub_pd(ymm0,ymm1); + } + }; + + struct Mult{ + // Real float + inline __m128 operator()(__m128 a, __m128 b){ + return _mm_mul_ps(a,b); + } + // Real double + inline __m128d operator()(__m128d a, __m128d b){ + return _mm_mul_pd(a,b); + } + // Integer + inline __m128i operator()(__m128i a, __m128i b){ + return _mm_mul_epi32(a,b); + } + + }; + + + + +} + +// Here assign types +namespace Grid { + typedef __m128 SIMD_Ftype; // Single precision type + typedef __m128d SIMD_Dtype; // Double precision type + typedef __m128i SIMD_Itype; // Integer type + + + // Function names + typedef Optimization::Vsplat VsplatSIMD; + typedef Optimization::Vstore VstoreSIMD; + + // Arithmetic operations + typedef Optimization::Sum SumSIMD; + typedef Optimization::Sub SubSIMD; + typedef Optimization::Mult MultSIMD; + typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::Vset VsetSIMD; + +} diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 7c2b1bed..030a8a79 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -1,12 +1,14 @@ -//---------------------------------------------------------------------- +//--------------------------------------------------------------------------- /*! @file Grid_vector_types.h - @brief Defines templated class to deal with inner vector types + @brief Defines templated class Grid_simd to deal with inner vector types */ -// Time-stamp: <2015-05-19 13:41:47 neo> -//---------------------------------------------------------------------- +// Time-stamp: <2015-05-19 17:20:36 neo> +//--------------------------------------------------------------------------- #ifndef GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES +#include "Grid_sse4.h" + namespace Grid { @@ -27,18 +29,20 @@ namespace Grid { template < typename T > struct is_complex< std::complex >: std::true_type {}; //////////////////////////////////////////////////////// - - // Define the operation templates functors - template < class SIMD, class Operation > - SIMD binary(SIMD src_1, SIMD src_2, Operation op){ + // general forms to allow for vsplat syntax + // need explicit declaration of types when used since + // clang cannot automatically determine the output type sometimes + template < class Out, class Input1, class Input2, class Operation > + Out binary(Input1 src_1, Input2 src_2, Operation op){ return op(src_1, src_2); - } + } - template < class SIMD, class Operation > - SIMD unary(SIMD src, Operation op){ + template < class SIMDout, class Input, class Operation > + SIMDout unary(Input src, Operation op){ return op(src); - } + } + /////////////////////////////////////////////// /* @@ -74,36 +78,42 @@ namespace Grid { }; - /////////////////////////////////////////////// // mac, mult, sub, add, adj - // Should do an AVX2 version with mac. /////////////////////////////////////////////// friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); }; friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); } friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); } friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); } + //not for integer types... FIXME friend inline Grid_simd adj(const Grid_simd &in){ return conj(in); } - ////////////////////////////////// - // Initialise to 1,0,i - ////////////////////////////////// + /////////////////////////////////////////////// + // Initialise to 1,0,i for the correct types + /////////////////////////////////////////////// // if not complex overload here - friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } - friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } - + template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } + template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } + // overload for complex type template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > - friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } + friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > - friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); } - + friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor? + // For integral type template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > - friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } + friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > - friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } + friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } + template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} + template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + friend inline void vfalse(vInteger &ret){vsplat(ret,0);} + // do not compile if real or integer, send an error message from the compiler template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > @@ -114,31 +124,44 @@ namespace Grid { //////////////////////////////////// friend inline Grid_simd operator + (Grid_simd a, Grid_simd b) { - vComplexF ret; - // FIXME call the binary op + Grid_simd ret; + ret.v = binary(a.v, b.v, SumSIMD()); return ret; }; friend inline Grid_simd operator - (Grid_simd a, Grid_simd b) { - vComplexF ret; - // FIXME call the binary op + Grid_simd ret; + ret.v = binary(a.v, b.v, SubSIMD()); return ret; }; - friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) + // Distinguish between complex types and others + template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > + friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) { - vComplexF ret; - // FIXME call the binary op + Grid_simd ret; + ret.v = binary(a.v,b.v, MultComplexSIMD()); return ret; }; - + + // Real/Integer types + template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) + { + Grid_simd ret; + ret.v = binary(a.v,b.v, MultSIMD()); + return ret; + }; + + + //////////////////////////////////////////////////////////////////////// // FIXME: gonna remove these load/store, get, set, prefetch //////////////////////////////////////////////////////////////////////// friend inline void vset(Grid_simd &ret, Scalar_type *a){ - // FIXME set + ret.v = unary(a, VsetSIMD()); } /////////////////////// @@ -147,34 +170,33 @@ namespace Grid { // overload if complex template < class S = Scalar_type > friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){ - Real a= real(c); - Real b= imag(c); + Real a = real(c); + Real b = imag(c); vsplat(ret,a,b); } // this only for the complex version template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > friend inline void vsplat(Grid_simd &ret,Real a, Real b){ - // FIXME add operator + ret.v = binary(a, b, VsplatSIMD()); } - //if real fill with a, if complex fill with a in the real part + //if real fill with a, if complex fill with a in the real part (first function above) friend inline void vsplat(Grid_simd &ret,Real a){ - // FIXME add operator + ret.v = unary(a, VsplatSIMD()); } - friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ - //FIXME + binary(ret.v, (Real*)a, VstoreSIMD()); } + friend inline void vprefetch(const Grid_simd &v) { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); } - friend inline Scalar_type Reduce(const Grid_simd & in) { // FIXME add operator @@ -221,6 +243,7 @@ namespace Grid { inline Grid_simd &operator *=(const Grid_simd &r) { *this = (*this)*r; return *this; + // return (*this)*r; ? } inline Grid_simd &operator +=(const Grid_simd &r) { *this = *this+r; @@ -233,6 +256,12 @@ namespace Grid { + friend inline void permute(Grid_simd &y,Grid_simd b,int perm) + { + Gpermute(y,b,perm); + } + + /* friend inline void permute(Grid_simd &y,Grid_simd b,int perm) { Gpermute(y,b,perm); @@ -253,7 +282,7 @@ namespace Grid { { Gextract(y,extracted); } - + */ };// end of Grid_simd class definition @@ -286,11 +315,11 @@ namespace Grid { // Define available types (now change names to avoid clashing) - typedef __m128 SIMD_type;// decided at compilation time - typedef Grid_simd< float , SIMD_type > MyRealF; - typedef Grid_simd< double , SIMD_type > MyRealD; - typedef Grid_simd< std::complex< float > , SIMD_type > MyComplexF; - typedef Grid_simd< std::complex< double >, SIMD_type > MyComplexD; + + typedef Grid_simd< float , SIMD_Ftype > MyRealF; + typedef Grid_simd< double , SIMD_Dtype > MyRealD; + typedef Grid_simd< std::complex< float > , SIMD_Ftype > MyComplexF; + typedef Grid_simd< std::complex< double >, SIMD_Dtype > MyComplexD; diff --git a/lib/stamp-h1 b/lib/stamp-h1 new file mode 100644 index 00000000..753f890f --- /dev/null +++ b/lib/stamp-h1 @@ -0,0 +1 @@ +timestamp for lib/Grid_config.h diff --git a/lib/stencil/.dirstamp b/lib/stencil/.dirstamp new file mode 100644 index 00000000..e69de29b diff --git a/tests/Grid_main.cc b/tests/Grid_main.cc index 45778922..17e36df9 100644 --- a/tests/Grid_main.cc +++ b/tests/Grid_main.cc @@ -1,5 +1,9 @@ #include "Grid.h" +//DEBUG +#include "simd/Grid_vector_types.h" + + using namespace std; using namespace Grid; using namespace Grid::QCD; @@ -151,6 +155,39 @@ int main (int argc, char ** argv) scMat = sMat*scMat; // LatticeSpinColourMatrix = LatticeSpinMatrix * LatticeSpinColourMatrix + + +#ifdef SSE4 + ///////// Tests the new class Grid_simd + std::complex ctest(3.0,2.0); + std::complex ctestf(3.0,2.0); + MyComplexF TestMe1(1.0); // fill real part + MyComplexD TestMe2(ctest); + MyComplexD TestMe3(ctest);// compiler generate conversion of basic types + //MyRealF TestMe5(ctest);// Must generate compiler error + MyRealD TestMe4(2.0); + + MyComplexF TestMe6(ctestf); + MyComplexF TestMe7(ctestf); + + MyComplexD TheSum= TestMe2*TestMe3; + MyComplexF TheSumF= TestMe6*TestMe7; + + double dsum[2]; + _mm_store_pd(dsum, TheSum.v); + for (int i =0; i< 2; i++) + printf("%f\n", dsum[i]); + + float fsum[4]; + _mm_store_ps(fsum, TheSumF.v); + for (int i =0; i< 4; i++) + printf("%f\n", fsum[i]); + + vstore(TheSum, &ctest); + std::cout << ctest<< std::endl; +#endif + /////////////////////// + // Non-lattice (const objects) * Lattice ColourMatrix cm; SpinColourMatrix scm;