mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'release/v0.6.0' into feature/feynman-rules
This commit is contained in:
commit
d5e95bc350
112
README.md
112
README.md
@ -16,11 +16,27 @@
|
|||||||
|
|
||||||
**Data parallel C++ mathematical object library.**
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
Please send all pull requests to the `develop` branch.
|
|
||||||
|
|
||||||
License: GPL v2.
|
License: GPL v2.
|
||||||
|
|
||||||
Last update 2016/08/03.
|
Last update Nov 2016.
|
||||||
|
|
||||||
|
_Please send all pull requests to the `develop` branch._
|
||||||
|
|
||||||
|
### Bug report
|
||||||
|
|
||||||
|
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||||
|
|
||||||
|
When you file an issue, please go though the following checklist:
|
||||||
|
|
||||||
|
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||||
|
2. Give a description of the target platform (CPU, network, compiler).
|
||||||
|
3. Give the exact `configure` command used.
|
||||||
|
4. Attach `config.log`.
|
||||||
|
5. Attach `config.summary`.
|
||||||
|
6. Attach the output of `make V=1`.
|
||||||
|
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Description
|
### Description
|
||||||
This library provides data parallel C++ container classes with internal memory layout
|
This library provides data parallel C++ container classes with internal memory layout
|
||||||
@ -42,7 +58,7 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
|||||||
for most programmers.
|
for most programmers.
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way).
|
Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
||||||
|
|
||||||
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
||||||
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
||||||
@ -50,7 +66,7 @@ The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `Compl
|
|||||||
MPI, OpenMP, and SIMD parallelism are present in the library.
|
MPI, OpenMP, and SIMD parallelism are present in the library.
|
||||||
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
||||||
|
|
||||||
### Installation
|
### Quick start
|
||||||
First, start by cloning the repository:
|
First, start by cloning the repository:
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
@ -71,12 +87,10 @@ mkdir build; cd build
|
|||||||
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
```
|
```
|
||||||
|
|
||||||
where `--enable-precision=` set the default precision (`single` or `double`),
|
where `--enable-precision=` set the default precision,
|
||||||
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
|
`--enable-simd=` set the SIMD type, `--enable-
|
||||||
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
|
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||||
`shmem`), and `<path>` should be replaced by the prefix path where you want to
|
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||||
install Grid. The `mpi-auto` communication option set `configure` to determine
|
|
||||||
automatically how to link to MPI. Other options are available, use `configure
|
|
||||||
--help` to display them. Like with any other program using GNU autotool, the
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||||
customise the build.
|
customise the build.
|
||||||
@ -93,24 +107,86 @@ To minimise the build time, only the tests at the root of the `tests` directory
|
|||||||
make -C tests/<subdir> tests
|
make -C tests/<subdir> tests
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Build configuration options
|
||||||
|
|
||||||
|
- `--prefix=<path>`: installation prefix for Grid.
|
||||||
|
- `--with-gmp=<path>`: look for GMP in the UNIX prefix `<path>`
|
||||||
|
- `--with-mpfr=<path>`: look for MPFR in the UNIX prefix `<path>`
|
||||||
|
- `--with-fftw=<path>`: look for FFTW in the UNIX prefix `<path>`
|
||||||
|
- `--enable-lapack[=<path>]`: enable LAPACK support in Lanczos eigensolver. A UNIX prefix containing the library can be specified (optional).
|
||||||
|
- `--enable-mkl[=<path>]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
|
||||||
|
- `--enable-numa`: ???
|
||||||
|
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||||
|
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||||
|
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||||
|
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
|
||||||
|
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||||
|
- `--enable-chroma`: enable Chroma regression tests.
|
||||||
|
|
||||||
|
### Possible communication interfaces
|
||||||
|
|
||||||
|
The following options can be use with the `--enable-simd=` option to target different communication interfaces:
|
||||||
|
|
||||||
|
| `<comm>` | Description |
|
||||||
|
| ------------- | -------------------------------------------- |
|
||||||
|
| `none` | no communications |
|
||||||
|
| `mpi[-auto]` | MPI communications |
|
||||||
|
| `mpi3[-auto]` | MPI communications using MPI 3 shared memory |
|
||||||
|
| `shmem ` | Cray SHMEM communications |
|
||||||
|
|
||||||
|
For `mpi` and `mpi3` the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names).
|
||||||
|
|
||||||
### Possible SIMD types
|
### Possible SIMD types
|
||||||
|
|
||||||
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
|
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
|
||||||
|
|
||||||
| String | Description |
|
| `<code>` | Description |
|
||||||
| ----------- | -------------------------------------- |
|
| ----------- | -------------------------------------- |
|
||||||
| `GEN` | generic portable vector code |
|
| `GEN` | generic portable vector code |
|
||||||
| `SSE4` | SSE 4.2 (128 bit) |
|
| `SSE4` | SSE 4.2 (128 bit) |
|
||||||
| `AVX` | AVX (256 bit) |
|
| `AVX` | AVX (256 bit) |
|
||||||
| `AVXFMA4` | AVX (256 bit) + FMA |
|
| `AVXFMA` | AVX (256 bit) + FMA |
|
||||||
|
| `AVXFMA4` | AVX (256 bit) + FMA4 |
|
||||||
| `AVX2` | AVX 2 (256 bit) |
|
| `AVX2` | AVX 2 (256 bit) |
|
||||||
| `AVX512` | AVX 512 bit |
|
| `AVX512` | AVX 512 bit |
|
||||||
| `AVX512MIC` | AVX 512 bit for Intel MIC architecture |
|
| `QPX` | QPX (256 bit) |
|
||||||
| `ICMI` | Intel ICMI instructions (512 bit) |
|
|
||||||
|
|
||||||
Alternatively, some CPU codenames can be directly used:
|
Alternatively, some CPU codenames can be directly used:
|
||||||
|
|
||||||
| String | Description |
|
| `<code>` | Description |
|
||||||
| ----------- | -------------------------------------- |
|
| ----------- | -------------------------------------- |
|
||||||
| `KNC` | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
||||||
| `KNL` | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
||||||
|
| `BGQ` | Blue Gene/Q |
|
||||||
|
|
||||||
|
#### Notes:
|
||||||
|
- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions.
|
||||||
|
- For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform.
|
||||||
|
- BG/Q performances are currently rather poor. This is being investigated for future versions.
|
||||||
|
|
||||||
|
### Build setup for Intel Knights Landing platform
|
||||||
|
|
||||||
|
The following configuration is recommended for the Intel Knights Landing platform:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
../configure --enable-precision=double\
|
||||||
|
--enable-simd=KNL \
|
||||||
|
--enable-comms=mpi3-auto \
|
||||||
|
--with-gmp=<path> \
|
||||||
|
--with-mpfr=<path> \
|
||||||
|
--enable-mkl \
|
||||||
|
CXX=icpc MPICXX=mpiicpc
|
||||||
|
```
|
||||||
|
|
||||||
|
where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
../configure --enable-precision=double\
|
||||||
|
--enable-simd=KNL \
|
||||||
|
--enable-comms=mpi3 \
|
||||||
|
--with-gmp=<path> \
|
||||||
|
--with-mpfr=<path> \
|
||||||
|
--enable-mkl \
|
||||||
|
CXX=CC CC=cc
|
||||||
|
```
|
||||||
|
|
||||||
|
4
VERSION
4
VERSION
@ -1,4 +1,6 @@
|
|||||||
Version : 0.5.0
|
Version : 0.6.0
|
||||||
|
|
||||||
- AVX512, AVX2, AVX, SSE good
|
- AVX512, AVX2, AVX, SSE good
|
||||||
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
||||||
|
- MPI and MPI3
|
||||||
|
- HiRep, Smearing, Generic gauge group
|
||||||
|
266
configure.ac
266
configure.ac
@ -1,5 +1,5 @@
|
|||||||
AC_PREREQ([2.63])
|
AC_PREREQ([2.63])
|
||||||
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
|
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
|
||||||
AC_CANONICAL_BUILD
|
AC_CANONICAL_BUILD
|
||||||
AC_CANONICAL_HOST
|
AC_CANONICAL_HOST
|
||||||
AC_CANONICAL_TARGET
|
AC_CANONICAL_TARGET
|
||||||
@ -9,22 +9,33 @@ AC_CONFIG_SRCDIR([lib/Grid.h])
|
|||||||
AC_CONFIG_HEADERS([lib/Config.h])
|
AC_CONFIG_HEADERS([lib/Config.h])
|
||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
|
|
||||||
############### Checks for programs
|
############### Checks for programs
|
||||||
AC_LANG(C++)
|
|
||||||
CXXFLAGS="-O3 $CXXFLAGS"
|
CXXFLAGS="-O3 $CXXFLAGS"
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
|
|
||||||
############ openmp ###############
|
############### Get compiler informations
|
||||||
|
AC_LANG([C++])
|
||||||
|
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
||||||
|
AX_COMPILER_VENDOR
|
||||||
|
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
|
||||||
|
[vendor of C++ compiler that will compile the code])
|
||||||
|
AX_GXX_VERSION
|
||||||
|
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
||||||
|
[version of g++ that will compile the code])
|
||||||
|
|
||||||
|
############### Checks for typedefs, structures, and compiler characteristics
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
AC_TYPE_UINT32_T
|
||||||
|
AC_TYPE_UINT64_T
|
||||||
|
|
||||||
|
############### OpenMP
|
||||||
AC_OPENMP
|
AC_OPENMP
|
||||||
|
|
||||||
ac_openmp=no
|
ac_openmp=no
|
||||||
|
|
||||||
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||||
ac_openmp=yes
|
ac_openmp=yes
|
||||||
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
||||||
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
|
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
############### Checks for header files
|
############### Checks for header files
|
||||||
@ -37,12 +48,7 @@ AC_CHECK_HEADERS(execinfo.h)
|
|||||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||||
|
|
||||||
############### Checks for typedefs, structures, and compiler characteristics
|
############### GMP and MPFR
|
||||||
AC_TYPE_SIZE_T
|
|
||||||
AC_TYPE_UINT32_T
|
|
||||||
AC_TYPE_UINT64_T
|
|
||||||
|
|
||||||
############### GMP and MPFR #################
|
|
||||||
AC_ARG_WITH([gmp],
|
AC_ARG_WITH([gmp],
|
||||||
[AS_HELP_STRING([--with-gmp=prefix],
|
[AS_HELP_STRING([--with-gmp=prefix],
|
||||||
[try this for a non-standard install prefix of the GMP library])],
|
[try this for a non-standard install prefix of the GMP library])],
|
||||||
@ -54,10 +60,17 @@ AC_ARG_WITH([mpfr],
|
|||||||
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||||
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
################## lapack ####################
|
############### FFTW3
|
||||||
|
AC_ARG_WITH([fftw],
|
||||||
|
[AS_HELP_STRING([--with-fftw=prefix],
|
||||||
|
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
|
############### lapack
|
||||||
AC_ARG_ENABLE([lapack],
|
AC_ARG_ENABLE([lapack],
|
||||||
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||||
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
|
||||||
|
|
||||||
case ${ac_LAPACK} in
|
case ${ac_LAPACK} in
|
||||||
no)
|
no)
|
||||||
@ -67,10 +80,26 @@ case ${ac_LAPACK} in
|
|||||||
*)
|
*)
|
||||||
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
|
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
|
||||||
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
|
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
|
||||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
################## first-touch ####################
|
############### MKL
|
||||||
|
AC_ARG_ENABLE([mkl],
|
||||||
|
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
|
||||||
|
[ac_MKL=${enable_mkl}], [ac_MKL=no])
|
||||||
|
|
||||||
|
case ${ac_MKL} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_MKL/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_MKL/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### first-touch
|
||||||
AC_ARG_ENABLE([numa],
|
AC_ARG_ENABLE([numa],
|
||||||
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||||
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
||||||
@ -84,56 +113,44 @@ case ${ac_NUMA} in
|
|||||||
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
################## FFTW3 ####################
|
|
||||||
AC_ARG_WITH([fftw],
|
|
||||||
[AS_HELP_STRING([--with-fftw=prefix],
|
|
||||||
[try this for a non-standard install prefix of the FFTW3 library])],
|
|
||||||
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
|
||||||
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
|
||||||
|
|
||||||
################ Get compiler informations
|
|
||||||
AC_LANG([C++])
|
|
||||||
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
|
||||||
AX_COMPILER_VENDOR
|
|
||||||
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
|
|
||||||
[vendor of C++ compiler that will compile the code])
|
|
||||||
AX_GXX_VERSION
|
|
||||||
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
|
||||||
[version of g++ that will compile the code])
|
|
||||||
|
|
||||||
############### Checks for library functions
|
############### Checks for library functions
|
||||||
CXXFLAGS_CPY=$CXXFLAGS
|
CXXFLAGS_CPY=$CXXFLAGS
|
||||||
LDFLAGS_CPY=$LDFLAGS
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
AC_CHECK_LIB([gmp],[__gmpf_init],
|
|
||||||
[AC_CHECK_LIB([mpfr],[mpfr_init],
|
if test "${ac_MKL}x" != "nox"; then
|
||||||
[AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])]
|
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
|
||||||
[have_mpfr=true]
|
[AC_MSG_ERROR("MKL enabled but library not found")])
|
||||||
[LIBS="$LIBS -lmpfr"],
|
fi
|
||||||
[AC_MSG_ERROR([MPFR library not found])])]
|
|
||||||
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
|
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||||
[have_gmp=true]
|
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||||
[LIBS="$LIBS -lgmp"],
|
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||||
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
|
[Define to 1 if you have the `MPFR' library])]
|
||||||
|
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
|
||||||
|
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
|
||||||
|
[have_gmp=true])
|
||||||
|
|
||||||
if test "${ac_LAPACK}x" != "nox"; then
|
if test "${ac_LAPACK}x" != "nox"; then
|
||||||
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
|
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
|
||||||
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||||
fi
|
fi
|
||||||
AC_CHECK_LIB([fftw3],[fftw_execute],
|
|
||||||
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
|
AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||||
[have_fftw=true]
|
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
|
||||||
[LIBS="$LIBS -lfftw3 -lfftw3f"],
|
[AC_MSG_ERROR("single precision FFTW library not found")])]
|
||||||
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
|
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
||||||
|
[have_fftw=true])
|
||||||
|
|
||||||
CXXFLAGS=$CXXFLAGS_CPY
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
LDFLAGS=$LDFLAGS_CPY
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
|
|
||||||
############### SIMD instruction selection
|
############### SIMD instruction selection
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=<code>],
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
[select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN])
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
|
|
||||||
|
|
||||||
case ${ax_cv_cxx_compiler_vendor} in
|
case ${ax_cv_cxx_compiler_vendor} in
|
||||||
clang|gnu)
|
clang|gnu)
|
||||||
@ -153,12 +170,15 @@ case ${ax_cv_cxx_compiler_vendor} in
|
|||||||
AVX2)
|
AVX2)
|
||||||
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
SIMD_FLAGS='-mavx2 -mfma';;
|
SIMD_FLAGS='-mavx2 -mfma';;
|
||||||
AVX512|AVX512MIC|KNL)
|
AVX512)
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
||||||
IMCI|KNC)
|
KNC)
|
||||||
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
|
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
|
||||||
SIMD_FLAGS='';;
|
SIMD_FLAGS='';;
|
||||||
|
KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-march=knl';;
|
||||||
GEN)
|
GEN)
|
||||||
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
SIMD_FLAGS='';;
|
SIMD_FLAGS='';;
|
||||||
@ -176,9 +196,6 @@ case ${ax_cv_cxx_compiler_vendor} in
|
|||||||
AVX)
|
AVX)
|
||||||
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
SIMD_FLAGS='-mavx -xavx';;
|
SIMD_FLAGS='-mavx -xavx';;
|
||||||
AVXFMA4)
|
|
||||||
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
|
|
||||||
SIMD_FLAGS='-mavx -mfma';;
|
|
||||||
AVXFMA)
|
AVXFMA)
|
||||||
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
|
||||||
SIMD_FLAGS='-mavx -mfma';;
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
@ -188,12 +205,12 @@ case ${ax_cv_cxx_compiler_vendor} in
|
|||||||
AVX512)
|
AVX512)
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
SIMD_FLAGS='-xcore-avx512';;
|
SIMD_FLAGS='-xcore-avx512';;
|
||||||
AVX512MIC|KNL)
|
KNC)
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
|
||||||
SIMD_FLAGS='-xmic-avx512';;
|
|
||||||
IMCI|KNC)
|
|
||||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
|
||||||
SIMD_FLAGS='';;
|
SIMD_FLAGS='';;
|
||||||
|
KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
||||||
|
SIMD_FLAGS='-xmic-avx512';;
|
||||||
GEN)
|
GEN)
|
||||||
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
SIMD_FLAGS='';;
|
SIMD_FLAGS='';;
|
||||||
@ -208,14 +225,18 @@ AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
|||||||
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
||||||
|
|
||||||
case ${ac_SIMD} in
|
case ${ac_SIMD} in
|
||||||
AVX512|AVX512MIC|KNL)
|
AVX512|KNL)
|
||||||
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
|
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
|
||||||
*)
|
*)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
############### precision selection
|
############### Precision selection
|
||||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
AC_ARG_ENABLE([precision],
|
||||||
|
[AC_HELP_STRING([--enable-precision=single|double],
|
||||||
|
[Select default word size of Real])],
|
||||||
|
[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
|
|
||||||
case ${ac_PRECISION} in
|
case ${ac_PRECISION} in
|
||||||
single)
|
single)
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||||
@ -226,43 +247,49 @@ case ${ac_PRECISION} in
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
############### communication type selection
|
############### communication type selection
|
||||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
|
||||||
|
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||||
|
|
||||||
case ${ac_COMMS} in
|
case ${ac_COMMS} in
|
||||||
none)
|
none)
|
||||||
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
||||||
;;
|
;;
|
||||||
mpi-auto)
|
mpi|mpi-auto)
|
||||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||||
LX_FIND_MPI
|
|
||||||
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
|
|
||||||
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
|
||||||
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
|
||||||
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
|
||||||
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS"
|
|
||||||
;;
|
;;
|
||||||
mpi)
|
mpi3|mpi3-auto)
|
||||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
||||||
;;
|
|
||||||
mpi3)
|
|
||||||
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
|
||||||
;;
|
;;
|
||||||
shmem)
|
shmem)
|
||||||
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
case ${ac_COMMS} in
|
||||||
|
*-auto)
|
||||||
|
LX_FIND_MPI
|
||||||
|
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
|
||||||
|
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
||||||
|
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
||||||
|
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS";;
|
||||||
|
*)
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
|
AM_CONDITIONAL(BUILD_COMMS_MPI,
|
||||||
AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] )
|
[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_MPI3,
|
||||||
|
[ test "X${ac_COMMS}X" == "Xmpi3X" || test "X${ac_COMMS}X" == "Xmpi3-autoX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
||||||
|
|
||||||
############### RNG selection
|
############### RNG selection
|
||||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
||||||
[Select Random Number Generator to be used])],\
|
[Select Random Number Generator to be used])],\
|
||||||
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
||||||
|
|
||||||
case ${ac_RNG} in
|
case ${ac_RNG} in
|
||||||
ranlux48)
|
ranlux48)
|
||||||
@ -276,10 +303,11 @@ case ${ac_RNG} in
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
############### timer option
|
############### Timer option
|
||||||
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
|
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
|
||||||
[Enable system dependent high res timers])],\
|
[Enable system dependent high res timers])],\
|
||||||
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
||||||
|
|
||||||
case ${ac_TIMERS} in
|
case ${ac_TIMERS} in
|
||||||
yes)
|
yes)
|
||||||
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
||||||
@ -293,7 +321,9 @@ case ${ac_TIMERS} in
|
|||||||
esac
|
esac
|
||||||
|
|
||||||
############### Chroma regression test
|
############### Chroma regression test
|
||||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],
|
||||||
|
[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
||||||
|
|
||||||
case ${ac_CHROMA} in
|
case ${ac_CHROMA} in
|
||||||
yes|no)
|
yes|no)
|
||||||
;;
|
;;
|
||||||
@ -301,6 +331,7 @@ case ${ac_CHROMA} in
|
|||||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
||||||
|
|
||||||
############### Doxygen
|
############### Doxygen
|
||||||
@ -332,35 +363,36 @@ AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
|||||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
||||||
echo "
|
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
Summary of configuration for $PACKAGE v$VERSION
|
Summary of configuration for $PACKAGE v$VERSION
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
----- PLATFORM ----------------------------------------
|
----- PLATFORM ----------------------------------------
|
||||||
- architecture (build) : $build_cpu
|
architecture (build) : $build_cpu
|
||||||
- os (build) : $build_os
|
os (build) : $build_os
|
||||||
- architecture (target) : $target_cpu
|
architecture (target) : $target_cpu
|
||||||
- os (target) : $target_os
|
os (target) : $target_os
|
||||||
- compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||||
- compiler version : ${ax_cv_gxx_version}
|
compiler version : ${ax_cv_gxx_version}
|
||||||
----- BUILD OPTIONS -----------------------------------
|
----- BUILD OPTIONS -----------------------------------
|
||||||
- SIMD : ${ac_SIMD}
|
SIMD : ${ac_SIMD}
|
||||||
- Threading : ${ac_openmp}
|
Threading : ${ac_openmp}
|
||||||
- Communications type : ${ac_COMMS}
|
Communications type : ${ac_COMMS}
|
||||||
- Default precision : ${ac_PRECISION}
|
Default precision : ${ac_PRECISION}
|
||||||
- RNG choice : ${ac_RNG}
|
RNG choice : ${ac_RNG}
|
||||||
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||||
- LAPACK : ${ac_LAPACK}
|
LAPACK : ${ac_LAPACK}
|
||||||
- FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||||
----- BUILD FLAGS -------------------------------------
|
----- BUILD FLAGS -------------------------------------
|
||||||
- CXXFLAGS:
|
CXXFLAGS:
|
||||||
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
- LDFLAGS:
|
LDFLAGS:
|
||||||
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
- LIBS:
|
LIBS:
|
||||||
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
-------------------------------------------------------
|
-------------------------------------------------------" > config.summary
|
||||||
"
|
echo ""
|
||||||
|
cat config.summary
|
||||||
|
echo ""
|
||||||
|
@ -30,7 +30,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#define _GRID_FFT_H_
|
#define _GRID_FFT_H_
|
||||||
|
|
||||||
#ifdef HAVE_FFTW
|
#ifdef HAVE_FFTW
|
||||||
#include <fftw3.h>
|
#include <Grid/fftw/fftw3.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,7 +31,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#include <string.h> //memset
|
#include <string.h> //memset
|
||||||
#ifdef USE_LAPACK
|
#ifdef USE_LAPACK
|
||||||
#include <lapacke.h>
|
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||||
|
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||||
|
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||||
|
double *work, int *lwork, int *iwork, int *liwork,
|
||||||
|
int *info);
|
||||||
#endif
|
#endif
|
||||||
#include "DenseMatrix.h"
|
#include "DenseMatrix.h"
|
||||||
#include "EigenSort.h"
|
#include "EigenSort.h"
|
||||||
|
@ -382,14 +382,9 @@ namespace Optimization {
|
|||||||
// Some Template specialization
|
// Some Template specialization
|
||||||
|
|
||||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||||
<<<<<<< HEAD
|
|
||||||
#define GNU_CLANG_COMPILER
|
|
||||||
#ifdef GNU_CLANG_COMPILER
|
|
||||||
=======
|
|
||||||
|
|
||||||
#ifndef __INTEL_COMPILER
|
#ifndef __INTEL_COMPILER
|
||||||
#warning "Slow reduction due to incomplete reduce intrinsics"
|
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||||
>>>>>>> develop
|
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||||
|
Loading…
Reference in New Issue
Block a user