mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
Merge branch 'release/v0.6.0'
This commit is contained in:
commit
c363bdd784
29
.gitignore
vendored
29
.gitignore
vendored
@ -5,7 +5,6 @@
|
|||||||
*.o
|
*.o
|
||||||
*.obj
|
*.obj
|
||||||
|
|
||||||
|
|
||||||
# Editor files #
|
# Editor files #
|
||||||
################
|
################
|
||||||
*~
|
*~
|
||||||
@ -48,6 +47,7 @@ Config.h.in
|
|||||||
config.log
|
config.log
|
||||||
config.status
|
config.status
|
||||||
.deps
|
.deps
|
||||||
|
*.inc
|
||||||
|
|
||||||
# http://www.gnu.org/software/autoconf #
|
# http://www.gnu.org/software/autoconf #
|
||||||
########################################
|
########################################
|
||||||
@ -63,19 +63,7 @@ config.sub
|
|||||||
config.guess
|
config.guess
|
||||||
INSTALL
|
INSTALL
|
||||||
.dirstamp
|
.dirstamp
|
||||||
|
ltmain.sh
|
||||||
# Packages #
|
|
||||||
############
|
|
||||||
# it's better to unpack these files and commit the raw source
|
|
||||||
# git has its own built in compression methods
|
|
||||||
*.7z
|
|
||||||
*.dmg
|
|
||||||
*.gz
|
|
||||||
*.iso
|
|
||||||
*.jar
|
|
||||||
*.rar
|
|
||||||
*.tar
|
|
||||||
*.zip
|
|
||||||
|
|
||||||
# Logs and databases #
|
# Logs and databases #
|
||||||
######################
|
######################
|
||||||
@ -101,3 +89,16 @@ build*/*
|
|||||||
#####################
|
#####################
|
||||||
*.xcodeproj/*
|
*.xcodeproj/*
|
||||||
build.sh
|
build.sh
|
||||||
|
|
||||||
|
# Eigen source #
|
||||||
|
################
|
||||||
|
lib/Eigen/*
|
||||||
|
|
||||||
|
# FFTW source #
|
||||||
|
################
|
||||||
|
lib/fftw/*
|
||||||
|
|
||||||
|
# libtool macros #
|
||||||
|
##################
|
||||||
|
m4/lt*
|
||||||
|
m4/libtool.m4
|
30
.travis.yml
30
.travis.yml
@ -9,10 +9,6 @@ matrix:
|
|||||||
- os: osx
|
- os: osx
|
||||||
osx_image: xcode7.2
|
osx_image: xcode7.2
|
||||||
compiler: clang
|
compiler: clang
|
||||||
- os: osx
|
|
||||||
osx_image: xcode7.2
|
|
||||||
compiler: gcc
|
|
||||||
env: VERSION=-5
|
|
||||||
- compiler: gcc
|
- compiler: gcc
|
||||||
addons:
|
addons:
|
||||||
apt:
|
apt:
|
||||||
@ -23,6 +19,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: VERSION=-4.9
|
env: VERSION=-4.9
|
||||||
- compiler: gcc
|
- compiler: gcc
|
||||||
@ -35,6 +33,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: VERSION=-5
|
env: VERSION=-5
|
||||||
- compiler: clang
|
- compiler: clang
|
||||||
@ -47,6 +47,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
- compiler: clang
|
- compiler: clang
|
||||||
@ -59,6 +61,8 @@ matrix:
|
|||||||
- libmpfr-dev
|
- libmpfr-dev
|
||||||
- libgmp-dev
|
- libgmp-dev
|
||||||
- libmpc-dev
|
- libmpc-dev
|
||||||
|
- libopenmpi-dev
|
||||||
|
- openmpi-bin
|
||||||
- binutils-dev
|
- binutils-dev
|
||||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||||
|
|
||||||
@ -69,6 +73,7 @@ before_install:
|
|||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
|
||||||
|
|
||||||
install:
|
install:
|
||||||
@ -82,13 +87,20 @@ install:
|
|||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- ./scripts/reconfigure_script
|
- ./bootstrap.sh
|
||||||
- mkdir build
|
- mkdir build
|
||||||
- cd build
|
- cd build
|
||||||
- ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
|
||||||
|
- make -j4
|
||||||
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
|
- echo make clean
|
||||||
|
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1
|
- ./benchmarks/Benchmark_dwf --threads 1
|
||||||
- make clean
|
- echo make clean
|
||||||
- ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=double --enable-simd=SSE4 --enable-comms=none
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
|
||||||
|
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
|
||||||
- make -j4
|
- make -j4
|
||||||
- ./benchmarks/Benchmark_dwf --threads 1
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||||
|
|
||||||
|
11
Makefile.am
11
Makefile.am
@ -1,5 +1,10 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
# additional include paths necessary to compile the C++ library
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/
|
SUBDIRS = lib benchmarks tests
|
||||||
SUBDIRS = lib tests benchmarks
|
|
||||||
|
|
||||||
filelist: $(SUBDIRS)
|
.PHONY: tests
|
||||||
|
|
||||||
|
tests: all
|
||||||
|
$(MAKE) -C tests tests
|
||||||
|
|
||||||
|
AM_CXXFLAGS += -I$(top_builddir)/include
|
||||||
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
44
README
44
README
@ -1,44 +0,0 @@
|
|||||||
This library provides data parallel C++ container classes with internal memory layout
|
|
||||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
|
||||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
|
||||||
array indices to both MPI tasks and SIMD processing elements.
|
|
||||||
|
|
||||||
* Identically shaped arrays then be processed with perfect data parallelisation.
|
|
||||||
* Such identically shapped arrays are called conformable arrays.
|
|
||||||
|
|
||||||
The transformation is based on the observation that Cartesian array processing involves
|
|
||||||
identical processing to be performed on different regions of the Cartesian array.
|
|
||||||
|
|
||||||
The library will (eventually) both geometrically decompose into MPI tasks and across SIMD lanes.
|
|
||||||
|
|
||||||
Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but
|
|
||||||
optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification
|
|
||||||
for most programmers.
|
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
|
||||||
Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported.
|
|
||||||
|
|
||||||
These are presented as
|
|
||||||
|
|
||||||
vRealF, vRealD, vComplexF, vComplexD
|
|
||||||
|
|
||||||
internal vector data types. These may be useful in themselves for other programmers.
|
|
||||||
The corresponding scalar types are named
|
|
||||||
|
|
||||||
RealF, RealD, ComplexF, ComplexD
|
|
||||||
|
|
||||||
MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is present in the library.
|
|
||||||
|
|
||||||
You can give `configure' initial values for configuration parameters
|
|
||||||
by setting variables in the command line or in the environment. Here
|
|
||||||
is are examples:
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
|
||||||
|
|
||||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
|
||||||
|
|
||||||
|
|
189
README.md
189
README.md
@ -1,15 +1,51 @@
|
|||||||
# Grid [![Build Status](https://travis-ci.org/paboyle/Grid.svg?branch=master)](https://travis-ci.org/paboyle/Grid)
|
# Grid
|
||||||
Data parallel C++ mathematical object library
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td>Last stable release</td>
|
||||||
|
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||||
|
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Development branch</td>
|
||||||
|
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||||
|
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
Last update 2015/7/30
|
**Data parallel C++ mathematical object library.**
|
||||||
|
|
||||||
|
License: GPL v2.
|
||||||
|
|
||||||
|
Last update Nov 2016.
|
||||||
|
|
||||||
|
_Please do not send pull requests to the `master` branch which is reserved for releases._
|
||||||
|
|
||||||
|
### Bug report
|
||||||
|
|
||||||
|
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
|
||||||
|
|
||||||
|
When you file an issue, please go though the following checklist:
|
||||||
|
|
||||||
|
1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number.
|
||||||
|
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
|
||||||
|
3. Give the exact `configure` command used.
|
||||||
|
4. Attach `config.log`.
|
||||||
|
5. Attach `config.summary`.
|
||||||
|
6. Attach the output of `make V=1`.
|
||||||
|
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Description
|
||||||
This library provides data parallel C++ container classes with internal memory layout
|
This library provides data parallel C++ container classes with internal memory layout
|
||||||
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
|
||||||
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
are provided, similar to HPF and cmfortran, and user control is given over the mapping of
|
||||||
array indices to both MPI tasks and SIMD processing elements.
|
array indices to both MPI tasks and SIMD processing elements.
|
||||||
|
|
||||||
* Identically shaped arrays then be processed with perfect data parallelisation.
|
* Identically shaped arrays then be processed with perfect data parallelisation.
|
||||||
* Such identically shapped arrays are called conformable arrays.
|
* Such identically shaped arrays are called conformable arrays.
|
||||||
|
|
||||||
The transformation is based on the observation that Cartesian array processing involves
|
The transformation is based on the observation that Cartesian array processing involves
|
||||||
identical processing to be performed on different regions of the Cartesian array.
|
identical processing to be performed on different regions of the Cartesian array.
|
||||||
@ -22,37 +58,136 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
|
|||||||
for most programmers.
|
for most programmers.
|
||||||
|
|
||||||
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
|
||||||
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way).
|
||||||
|
|
||||||
These are presented as
|
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers.
|
||||||
|
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
|
||||||
vRealF, vRealD, vComplexF, vComplexD
|
|
||||||
|
|
||||||
internal vector data types. These may be useful in themselves for other programmers.
|
|
||||||
The corresponding scalar types are named
|
|
||||||
|
|
||||||
RealF, RealD, ComplexF, ComplexD
|
|
||||||
|
|
||||||
MPI, OpenMP, and SIMD parallelism are present in the library.
|
MPI, OpenMP, and SIMD parallelism are present in the library.
|
||||||
|
Please see https://arxiv.org/abs/1512.03487 for more detail.
|
||||||
|
|
||||||
You can give `configure' initial values for configuration parameters
|
### Quick start
|
||||||
by setting variables in the command line or in the environment. Here
|
First, start by cloning the repository:
|
||||||
are examples:
|
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
|
``` bash
|
||||||
|
git clone https://github.com/paboyle/Grid.git
|
||||||
|
```
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX
|
Then enter the cloned directory and set up the build system:
|
||||||
|
|
||||||
./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
|
``` bash
|
||||||
|
cd Grid
|
||||||
|
./bootstrap.sh
|
||||||
|
```
|
||||||
|
|
||||||
./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
|
Now you can execute the `configure` script to generate makefiles (here from a build directory):
|
||||||
|
|
||||||
Note: Before running configure it could be necessary to execute the script
|
|
||||||
|
|
||||||
script/filelist
|
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
mkdir build; cd build
|
||||||
|
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
|
||||||
|
```
|
||||||
|
|
||||||
|
where `--enable-precision=` set the default precision,
|
||||||
For developers:
|
`--enable-simd=` set the SIMD type, `--enable-
|
||||||
Use reconfigure_script in the scripts/ directory to create the autotools environment
|
comms=`, and `<path>` should be replaced by the prefix path where you want to
|
||||||
|
install Grid. Other options are detailed in the next section, you can also use `configure
|
||||||
|
--help` to display them. Like with any other program using GNU autotool, the
|
||||||
|
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
|
||||||
|
customise the build.
|
||||||
|
|
||||||
|
Finally, you can build and install Grid:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
make; make install
|
||||||
|
```
|
||||||
|
|
||||||
|
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
make -C tests/<subdir> tests
|
||||||
|
```
|
||||||
|
If you want to build all the tests at once just use `make tests`.
|
||||||
|
|
||||||
|
### Build configuration options
|
||||||
|
|
||||||
|
- `--prefix=<path>`: installation prefix for Grid.
|
||||||
|
- `--with-gmp=<path>`: look for GMP in the UNIX prefix `<path>`
|
||||||
|
- `--with-mpfr=<path>`: look for MPFR in the UNIX prefix `<path>`
|
||||||
|
- `--with-fftw=<path>`: look for FFTW in the UNIX prefix `<path>`
|
||||||
|
- `--enable-lapack[=<path>]`: enable LAPACK support in Lanczos eigensolver. A UNIX prefix containing the library can be specified (optional).
|
||||||
|
- `--enable-mkl[=<path>]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
|
||||||
|
- `--enable-numa`: ???
|
||||||
|
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
|
||||||
|
- `--enable-precision={single|double}`: set the default precision (default: `double`).
|
||||||
|
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
|
||||||
|
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
|
||||||
|
- `--disable-timers`: disable system dependent high-resolution timers.
|
||||||
|
- `--enable-chroma`: enable Chroma regression tests.
|
||||||
|
|
||||||
|
### Possible communication interfaces
|
||||||
|
|
||||||
|
The following options can be use with the `--enable-comms=` option to target different communication interfaces:
|
||||||
|
|
||||||
|
| `<comm>` | Description |
|
||||||
|
| -------------- | ------------------------------------------------------------- |
|
||||||
|
| `none` | no communications |
|
||||||
|
| `mpi[-auto]` | MPI communications |
|
||||||
|
| `mpi3[-auto]` | MPI communications using MPI 3 shared memory |
|
||||||
|
| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model |
|
||||||
|
| `shmem ` | Cray SHMEM communications |
|
||||||
|
|
||||||
|
For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names).
|
||||||
|
|
||||||
|
### Possible SIMD types
|
||||||
|
|
||||||
|
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
|
||||||
|
|
||||||
|
| `<code>` | Description |
|
||||||
|
| ----------- | -------------------------------------- |
|
||||||
|
| `GEN` | generic portable vector code |
|
||||||
|
| `SSE4` | SSE 4.2 (128 bit) |
|
||||||
|
| `AVX` | AVX (256 bit) |
|
||||||
|
| `AVXFMA` | AVX (256 bit) + FMA |
|
||||||
|
| `AVXFMA4` | AVX (256 bit) + FMA4 |
|
||||||
|
| `AVX2` | AVX 2 (256 bit) |
|
||||||
|
| `AVX512` | AVX 512 bit |
|
||||||
|
| `QPX` | QPX (256 bit) |
|
||||||
|
|
||||||
|
Alternatively, some CPU codenames can be directly used:
|
||||||
|
|
||||||
|
| `<code>` | Description |
|
||||||
|
| ----------- | -------------------------------------- |
|
||||||
|
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
|
||||||
|
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
|
||||||
|
| `BGQ` | Blue Gene/Q |
|
||||||
|
|
||||||
|
#### Notes:
|
||||||
|
- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions of Grid when the AVX512 support within GCC and clang will be more advanced.
|
||||||
|
- For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform.
|
||||||
|
- BG/Q performances are currently rather poor. This is being investigated for future versions.
|
||||||
|
|
||||||
|
### Build setup for Intel Knights Landing platform
|
||||||
|
|
||||||
|
The following configuration is recommended for the Intel Knights Landing platform:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
../configure --enable-precision=double\
|
||||||
|
--enable-simd=KNL \
|
||||||
|
--enable-comms=mpi-auto \
|
||||||
|
--with-gmp=<path> \
|
||||||
|
--with-mpfr=<path> \
|
||||||
|
--enable-mkl \
|
||||||
|
CXX=icpc MPICXX=mpiicpc
|
||||||
|
```
|
||||||
|
|
||||||
|
where `<path>` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
../configure --enable-precision=double\
|
||||||
|
--enable-simd=KNL \
|
||||||
|
--enable-comms=mpi \
|
||||||
|
--with-gmp=<path> \
|
||||||
|
--with-mpfr=<path> \
|
||||||
|
--enable-mkl \
|
||||||
|
CXX=CC CC=cc
|
||||||
|
```
|
4
VERSION
4
VERSION
@ -1,4 +1,6 @@
|
|||||||
Version : 0.5.0
|
Version : 0.6.0
|
||||||
|
|
||||||
- AVX512, AVX2, AVX, SSE good
|
- AVX512, AVX2, AVX, SSE good
|
||||||
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
|
||||||
|
- MPI and MPI3
|
||||||
|
- HiRep, Smearing, Generic gauge group
|
||||||
|
@ -25,7 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -42,15 +42,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int Nloop=10;
|
int Nloop=10;
|
||||||
int nmu=0;
|
int nmu=0;
|
||||||
for(int mu=0;mu<4;mu++) if (mpi_layout[mu]>1) nmu++;
|
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
int maxlat=16;
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=2){
|
|
||||||
for(int Ls=1;Ls<=16;Ls*=2){
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
@ -125,7 +124,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
for(int lat=4;lat<=32;lat+=2){
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
for(int Ls=1;Ls<=16;Ls*=2){
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
std::vector<int> latt_size ({lat,lat,lat,lat});
|
||||||
@ -195,6 +194,168 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Nloop=100;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
Grid.ShmBufferFreeAll();
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
Grid.Barrier();
|
||||||
|
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start; // microseconds
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Nloop=100;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=4;lat<=maxlat;lat+=2){
|
||||||
|
for(int Ls=1;Ls<=16;Ls*=2){
|
||||||
|
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
std::vector<HalfSpinColourVectorD *> xbuf(8);
|
||||||
|
std::vector<HalfSpinColourVectorD *> rbuf(8);
|
||||||
|
Grid.ShmBufferFreeAll();
|
||||||
|
for(int d=0;d<8;d++){
|
||||||
|
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
|
||||||
|
}
|
||||||
|
|
||||||
|
int ncomm;
|
||||||
|
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
for(int i=0;i<Nloop;i++){
|
||||||
|
|
||||||
|
std::vector<CartesianCommunicator::CommsRequest_t> requests;
|
||||||
|
|
||||||
|
ncomm=0;
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
|
||||||
|
if (mpi_layout[mu]>1 ) {
|
||||||
|
|
||||||
|
ncomm++;
|
||||||
|
int comm_proc=1;
|
||||||
|
int xmit_to_rank;
|
||||||
|
int recv_from_rank;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
// Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
// requests.resize(0);
|
||||||
|
|
||||||
|
comm_proc = mpi_layout[mu]-1;
|
||||||
|
|
||||||
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
|
Grid.StencilSendToRecvFromBegin(requests,
|
||||||
|
(void *)&xbuf[mu+4][0],
|
||||||
|
xmit_to_rank,
|
||||||
|
(void *)&rbuf[mu+4][0],
|
||||||
|
recv_from_rank,
|
||||||
|
bytes);
|
||||||
|
Grid.StencilSendToRecvFromComplete(requests);
|
||||||
|
requests.resize(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Grid.Barrier();
|
||||||
|
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
|
||||||
|
double dbytes = bytes;
|
||||||
|
double xbytes = Nloop*dbytes*2.0*ncomm;
|
||||||
|
double rbytes = xbytes;
|
||||||
|
double bidibytes = xbytes+rbytes;
|
||||||
|
|
||||||
|
double time = stop-start; // microseconds
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -45,25 +44,20 @@ struct scal {
|
|||||||
Gamma::GammaT
|
Gamma::GammaT
|
||||||
};
|
};
|
||||||
|
|
||||||
bool overlapComms = false;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
|
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
|
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
int main (int argc, char ** argv)
|
||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
|
|
||||||
overlapComms = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
|
||||||
std::vector<int> latt4 = GridDefaultLatt();
|
std::vector<int> latt4 = GridDefaultLatt();
|
||||||
const int Ls=16;
|
const int Ls=8;
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
@ -71,8 +65,8 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
|
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
@ -87,8 +81,6 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion tmp(FGrid);
|
LatticeFermion tmp(FGrid);
|
||||||
LatticeFermion err(FGrid);
|
LatticeFermion err(FGrid);
|
||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
LatticeGaugeField Umu(UGrid);
|
||||||
random(RNG4,Umu);
|
random(RNG4,Umu);
|
||||||
|
|
||||||
@ -127,21 +119,27 @@ int main (int argc, char ** argv)
|
|||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
typename DomainWallFermionR::ImplParams params;
|
|
||||||
params.overlapCommsCompute = overlapComms;
|
|
||||||
|
|
||||||
RealD NP = UGrid->_Nprocessors;
|
RealD NP = UGrid->_Nprocessors;
|
||||||
|
|
||||||
for(int doasm=1;doasm<2;doasm++){
|
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=doasm;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
int ncall =100;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
|
||||||
int ncall =10;
|
|
||||||
if (1) {
|
if (1) {
|
||||||
|
FGrid->Barrier();
|
||||||
|
Dw.ZeroCounters();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
__SSC_START;
|
||||||
@ -149,6 +147,7 @@ int main (int argc, char ** argv)
|
|||||||
__SSC_STOP;
|
__SSC_STOP;
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
@ -157,19 +156,32 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
// Dw.Report();
|
assert (norm2(err)< 1.0e-4 );
|
||||||
|
Dw.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
{
|
{
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
|
||||||
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
LatticeFermion ssrc(sFGrid);
|
LatticeFermion ssrc(sFGrid);
|
||||||
LatticeFermion sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermion sresult(sFGrid);
|
LatticeFermion sresult(sFGrid);
|
||||||
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
|
|
||||||
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
@ -181,21 +193,24 @@ int main (int argc, char ** argv)
|
|||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
|
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
|
||||||
|
FGrid->Barrier();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
|
sDw.ZeroCounters();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
__SSC_START;
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
__SSC_STOP;
|
__SSC_STOP;
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=1344*volume*ncall;
|
double flops=1344*volume*ncall;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
// sDw.Report();
|
sDw.Report();
|
||||||
|
|
||||||
if(0){
|
if(0){
|
||||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||||
@ -208,9 +223,9 @@ int main (int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||||
|
|
||||||
|
RealD sum=0;
|
||||||
RealF sum=0;
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
for(int z=0;z<latt4[2];z++){
|
for(int z=0;z<latt4[2];z++){
|
||||||
@ -221,17 +236,19 @@ int main (int argc, char ** argv)
|
|||||||
peekSite(normal,result,site);
|
peekSite(normal,result,site);
|
||||||
peekSite(simd,sresult,site);
|
peekSite(simd,sresult,site);
|
||||||
sum=sum+norm2(normal-simd);
|
sum=sum+norm2(normal-simd);
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
if (norm2(normal-simd) > 1.0e-6 ) {
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||||
// std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||||
|
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||||
|
}
|
||||||
}}}}}
|
}}}}}
|
||||||
std::cout<<" difference between normal and simd is "<<sum<<std::endl;
|
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||||
|
assert (sum< 1.0e-4 );
|
||||||
|
|
||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
|
|
||||||
LatticeFermion sr_eo(sFGrid);
|
LatticeFermion sr_eo(sFGrid);
|
||||||
LatticeFermion serr(sFGrid);
|
|
||||||
|
|
||||||
LatticeFermion ssrc_e (sFrbGrid);
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
LatticeFermion ssrc_o (sFrbGrid);
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
@ -243,23 +260,37 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
setCheckerboard(sr_eo,ssrc_o);
|
setCheckerboard(sr_eo,ssrc_o);
|
||||||
setCheckerboard(sr_eo,ssrc_e);
|
setCheckerboard(sr_eo,ssrc_e);
|
||||||
serr = sr_eo-ssrc;
|
|
||||||
std::cout<<GridLogMessage << "EO src norm diff "<< norm2(serr)<<std::endl;
|
|
||||||
|
|
||||||
sr_e = zero;
|
sr_e = zero;
|
||||||
sr_o = zero;
|
sr_o = zero;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
|
||||||
|
FGrid->Barrier();
|
||||||
|
sDw.ZeroCounters();
|
||||||
|
sDw.stat.init("DhopEO");
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for (int i = 0; i < ncall; i++) {
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
sDw.stat.print();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
sDw.Report();
|
||||||
|
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
|
sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
|
||||||
@ -268,9 +299,18 @@ int main (int argc, char ** argv)
|
|||||||
pickCheckerboard(Even,ssrc_e,sresult);
|
pickCheckerboard(Even,ssrc_e,sresult);
|
||||||
pickCheckerboard(Odd ,ssrc_o,sresult);
|
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||||
ssrc_e = ssrc_e - sr_e;
|
ssrc_e = ssrc_e - sr_e;
|
||||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<<std::endl;
|
RealD error = norm2(ssrc_e);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||||
ssrc_o = ssrc_o - sr_o;
|
ssrc_o = ssrc_o - sr_o;
|
||||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<<std::endl;
|
|
||||||
|
error+= norm2(ssrc_o);
|
||||||
|
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||||
|
if(error>1.0e-4) {
|
||||||
|
setCheckerboard(ssrc,ssrc_o);
|
||||||
|
setCheckerboard(ssrc,ssrc_e);
|
||||||
|
std::cout<< ssrc << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -284,24 +324,25 @@ int main (int argc, char ** argv)
|
|||||||
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
|
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
for(int i=0;i<ref._odata.size();i++){
|
||||||
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
tmp =adj(U[mu])*src;
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
tmp =Cshift(tmp,mu+1,-1);
|
||||||
for(int i=0;i<ref._odata.size();i++){
|
for(int i=0;i<ref._odata.size();i++){
|
||||||
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ref = -0.5*ref;
|
ref = -0.5*ref;
|
||||||
}
|
}
|
||||||
Dw.Dhop(src,result,1);
|
Dw.Dhop(src,result,1);
|
||||||
|
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
|
||||||
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
assert(norm2(err)<1.0e-4);
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
LatticeFermion src_o (FrbGrid);
|
LatticeFermion src_o (FrbGrid);
|
||||||
LatticeFermion r_e (FrbGrid);
|
LatticeFermion r_e (FrbGrid);
|
||||||
@ -309,25 +350,38 @@ int main (int argc, char ** argv)
|
|||||||
LatticeFermion r_eo (FGrid);
|
LatticeFermion r_eo (FGrid);
|
||||||
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Deo and Doe"<<std::endl;
|
std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
|
||||||
pickCheckerboard(Even,src_e,src);
|
pickCheckerboard(Even,src_e,src);
|
||||||
pickCheckerboard(Odd,src_o,src);
|
pickCheckerboard(Odd,src_o,src);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
{
|
{
|
||||||
|
Dw.ZeroCounters();
|
||||||
|
FGrid->Barrier();
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
FGrid->Barrier();
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
|
Dw.Report();
|
||||||
}
|
}
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||||
@ -342,14 +396,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
err = r_eo-result;
|
err = r_eo-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
assert(norm2(err)<1.0e-4);
|
||||||
|
|
||||||
pickCheckerboard(Even,src_e,err);
|
pickCheckerboard(Even,src_e,err);
|
||||||
pickCheckerboard(Odd,src_o,err);
|
pickCheckerboard(Odd,src_o,err);
|
||||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||||
|
assert(norm2(src_e)<1.0e-4);
|
||||||
|
assert(norm2(src_o)<1.0e-4);
|
||||||
}
|
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -1,154 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./benchmarks/Benchmark_dwf.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid.h>
|
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace Grid;
|
|
||||||
using namespace Grid::QCD;
|
|
||||||
|
|
||||||
template<class d>
|
|
||||||
struct scal {
|
|
||||||
d internal;
|
|
||||||
};
|
|
||||||
|
|
||||||
Gamma::GammaMatrix Gmu [] = {
|
|
||||||
Gamma::GammaX,
|
|
||||||
Gamma::GammaY,
|
|
||||||
Gamma::GammaZ,
|
|
||||||
Gamma::GammaT
|
|
||||||
};
|
|
||||||
|
|
||||||
bool overlapComms = false;
|
|
||||||
|
|
||||||
|
|
||||||
int main (int argc, char ** argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc,&argv);
|
|
||||||
|
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){
|
|
||||||
overlapComms = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
|
||||||
|
|
||||||
std::vector<int> latt4 = GridDefaultLatt();
|
|
||||||
const int Ls=16;
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
|
||||||
|
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
|
||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
|
||||||
|
|
||||||
LatticeFermion src (FGrid); random(RNG5,src);
|
|
||||||
LatticeFermion result(FGrid); result=zero;
|
|
||||||
LatticeFermion ref(FGrid); ref=zero;
|
|
||||||
LatticeFermion tmp(FGrid);
|
|
||||||
LatticeFermion err(FGrid);
|
|
||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
|
||||||
random(RNG4,Umu);
|
|
||||||
|
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
|
||||||
|
|
||||||
// replicate across fifth dimension
|
|
||||||
for(int ss=0;ss<Umu._grid->oSites();ss++){
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////
|
|
||||||
// Naive wilson implementation
|
|
||||||
////////////////////////////////////
|
|
||||||
std::vector<LatticeColourMatrix> U(4,FGrid);
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (1)
|
|
||||||
{
|
|
||||||
ref = zero;
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
|
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
|
||||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
|
||||||
|
|
||||||
tmp =adj(U[mu])*src;
|
|
||||||
tmp =Cshift(tmp,mu+1,-1);
|
|
||||||
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
|
|
||||||
}
|
|
||||||
ref = -0.5*ref;
|
|
||||||
}
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
|
||||||
RealD M5 =1.8;
|
|
||||||
|
|
||||||
typename DomainWallFermionR::ImplParams params;
|
|
||||||
params.overlapCommsCompute = overlapComms;
|
|
||||||
|
|
||||||
RealD NP = UGrid->_Nprocessors;
|
|
||||||
|
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=1;
|
|
||||||
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
|
||||||
int ncall =50;
|
|
||||||
if (1) {
|
|
||||||
|
|
||||||
double t0=usecond();
|
|
||||||
for(int i=0;i<ncall;i++){
|
|
||||||
Dw.Dhop(src,result,0);
|
|
||||||
}
|
|
||||||
double t1=usecond();
|
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
|
||||||
double flops=1344*volume*ncall;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
|
||||||
err = ref-result;
|
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
|
||||||
// Dw.Report();
|
|
||||||
}
|
|
||||||
Grid_finalize();
|
|
||||||
}
|
|
@ -26,8 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
@ -52,22 +51,26 @@ int main (int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
const int Ls=16;
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
const int Ls=8;
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
if ( getenv("ASMOPT") ) {
|
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=1;
|
|
||||||
} else {
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
|
||||||
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
|
||||||
|
|
||||||
int Lmax=32;
|
int Lmax=16;
|
||||||
int dmin=0;
|
int dmin=2;
|
||||||
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
for (int L=8;L<=Lmax;L*=2){
|
for (int L=8;L<=Lmax;L*=2){
|
||||||
@ -126,7 +129,6 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
|
|
||||||
ColourMatrix cm = Complex(1.0,0.0);
|
ColourMatrix cm = Complex(1.0,0.0);
|
||||||
|
|
||||||
|
|
||||||
LatticeGaugeField Umu5d(FGrid);
|
LatticeGaugeField Umu5d(FGrid);
|
||||||
|
|
||||||
// replicate across fifth dimension
|
// replicate across fifth dimension
|
||||||
@ -145,11 +147,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CHECK
|
#ifdef CHECK
|
||||||
if (1)
|
if (1) {
|
||||||
{
|
|
||||||
ref = zero;
|
ref = zero;
|
||||||
for(int mu=0;mu<Nd;mu++){
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
tmp = U[mu]*Cshift(src,mu+1,1);
|
tmp = U[mu]*Cshift(src,mu+1,1);
|
||||||
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
|
||||||
|
|
||||||
@ -193,20 +194,19 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
Counter.Report();
|
Counter.Report();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! report )
|
if ( ! report ) {
|
||||||
{
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double flops=1344*volume*ncall;
|
||||||
double flops=1344*volume*ncall;
|
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
||||||
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CHECK
|
#ifdef CHECK
|
||||||
err = ref-result;
|
err = ref-result;
|
||||||
RealD errd = norm2(err);
|
RealD errd = norm2(err);
|
||||||
if ( errd> 1.0e-4 ) {
|
if ( errd> 1.0e-4 ) {
|
||||||
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
LatticeFermion src_e (FrbGrid);
|
LatticeFermion src_e (FrbGrid);
|
||||||
@ -232,10 +232,9 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
|
|||||||
std::cout<< flops/(t1-t0);
|
std::cout<< flops/(t1-t0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CHECK_SDW
|
#define CHECK_SDW
|
||||||
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -243,7 +242,9 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
|||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
|
||||||
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
|
||||||
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
@ -277,93 +278,89 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
RealD mass=0.1;
|
||||||
RealD M5 =1.8;
|
RealD M5 =1.8;
|
||||||
|
|
||||||
typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
|
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
|
||||||
LatticeFermion ssrc(sFGrid);
|
LatticeFermion ssrc(sFGrid);
|
||||||
LatticeFermion sref(sFGrid);
|
LatticeFermion sref(sFGrid);
|
||||||
LatticeFermion sresult(sFGrid);
|
LatticeFermion sresult(sFGrid);
|
||||||
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
|
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
|
||||||
|
|
||||||
for(int x=0;x<latt4[0];x++){
|
for(int x=0;x<latt4[0];x++){
|
||||||
for(int y=0;y<latt4[1];y++){
|
for(int y=0;y<latt4[1];y++){
|
||||||
for(int z=0;z<latt4[2];z++){
|
for(int z=0;z<latt4[2];z++){
|
||||||
for(int t=0;t<latt4[3];t++){
|
for(int t=0;t<latt4[3];t++){
|
||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
std::vector<int> site({s,x,y,z,t});
|
std::vector<int> site({s,x,y,z,t});
|
||||||
SpinColourVector tmp;
|
SpinColourVector tmp;
|
||||||
peekSite(tmp,src,site);
|
peekSite(tmp,src,site);
|
||||||
pokeSite(tmp,ssrc,site);
|
pokeSite(tmp,ssrc,site);
|
||||||
}}}}}
|
}}}}}
|
||||||
|
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
|
|
||||||
#ifdef TIMERS_OFF
|
#ifdef TIMERS_OFF
|
||||||
int ncall =10;
|
int ncall =10;
|
||||||
#else
|
#else
|
||||||
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PerformanceCounter Counter(8);
|
PerformanceCounter Counter(8);
|
||||||
Counter.Start();
|
Counter.Start();
|
||||||
t0=usecond();
|
t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
sDw.Dhop(ssrc,sresult,0);
|
sDw.Dhop(ssrc,sresult,0);
|
||||||
}
|
}
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
Counter.Stop();
|
Counter.Stop();
|
||||||
|
|
||||||
|
if ( report ) {
|
||||||
|
Counter.Report();
|
||||||
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
|
double flops=1344*volume*ncall;
|
||||||
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
|
}
|
||||||
|
|
||||||
if ( report ) {
|
LatticeFermion sr_eo(sFGrid);
|
||||||
Counter.Report();
|
LatticeFermion serr(sFGrid);
|
||||||
} else {
|
|
||||||
|
LatticeFermion ssrc_e (sFrbGrid);
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
LatticeFermion ssrc_o (sFrbGrid);
|
||||||
double flops=1344*volume*ncall;
|
LatticeFermion sr_e (sFrbGrid);
|
||||||
std::cout<<"\t"<< flops/(t1-t0);
|
LatticeFermion sr_o (sFrbGrid);
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
LatticeFermion sr_eo(sFGrid);
|
|
||||||
LatticeFermion serr(sFGrid);
|
|
||||||
|
|
||||||
LatticeFermion ssrc_e (sFrbGrid);
|
|
||||||
LatticeFermion ssrc_o (sFrbGrid);
|
|
||||||
LatticeFermion sr_e (sFrbGrid);
|
|
||||||
LatticeFermion sr_o (sFrbGrid);
|
|
||||||
|
|
||||||
pickCheckerboard(Even,ssrc_e,ssrc);
|
pickCheckerboard(Even,ssrc_e,ssrc);
|
||||||
pickCheckerboard(Odd,ssrc_o,ssrc);
|
pickCheckerboard(Odd,ssrc_o,ssrc);
|
||||||
|
|
||||||
setCheckerboard(sr_eo,ssrc_o);
|
setCheckerboard(sr_eo,ssrc_o);
|
||||||
setCheckerboard(sr_eo,ssrc_e);
|
setCheckerboard(sr_eo,ssrc_e);
|
||||||
|
|
||||||
sr_e = zero;
|
|
||||||
sr_o = zero;
|
|
||||||
|
|
||||||
|
sr_e = zero;
|
||||||
|
sr_o = zero;
|
||||||
|
|
||||||
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
|
PerformanceCounter CounterSdw(8);
|
||||||
|
CounterSdw.Start();
|
||||||
|
t0=usecond();
|
||||||
|
for(int i=0;i<ncall;i++){
|
||||||
|
__SSC_START;
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||||
PerformanceCounter CounterSdw(8);
|
__SSC_STOP;
|
||||||
CounterSdw.Start();
|
}
|
||||||
t0=usecond();
|
t1=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
CounterSdw.Stop();
|
||||||
__SSC_START;
|
|
||||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
|
||||||
__SSC_STOP;
|
|
||||||
}
|
|
||||||
t1=usecond();
|
|
||||||
CounterSdw.Stop();
|
|
||||||
|
|
||||||
if ( report ) {
|
if ( report ) {
|
||||||
CounterSdw.Report();
|
CounterSdw.Report();
|
||||||
} else {
|
} else {
|
||||||
|
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
double flops=(1344.0*volume*ncall)/2;
|
||||||
double flops=(1344.0*volume*ncall)/2;
|
std::cout<<"\t"<< flops/(t1-t0);
|
||||||
std::cout<<"\t"<< flops/(t1-t0);
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -26,7 +26,7 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
130
benchmarks/Benchmark_wilson_sweep.cc
Normal file
130
benchmarks/Benchmark_wilson_sweep.cc
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
Source file: ./benchmarks/Benchmark_wilson.cc
|
||||||
|
Copyright (C) 2015
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Richard Rollins <rprollins@users.noreply.github.com>
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class d>
|
||||||
|
struct scal {
|
||||||
|
d internal;
|
||||||
|
};
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
bool overlapComms = false;
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag );
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
|
||||||
|
typename WilsonFermionR::ImplParams params;
|
||||||
|
params.overlapCommsCompute = overlapComms;
|
||||||
|
|
||||||
|
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
||||||
|
std::vector<int> mpi_layout = GridDefaultMpi();
|
||||||
|
std::vector<int> seeds({1,2,3,4});
|
||||||
|
RealD mass = 0.1;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Benchmarking WilsonFermionR::Dhop "<<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
|
||||||
|
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
|
||||||
|
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
|
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
|
||||||
|
int Lmax = 32;
|
||||||
|
int dmin = 0;
|
||||||
|
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
|
||||||
|
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
|
||||||
|
for (int L=8; L<=Lmax; L*=2)
|
||||||
|
{
|
||||||
|
std::vector<int> latt_size = std::vector<int>(4,L);
|
||||||
|
for(int d=4; d>dmin; d--)
|
||||||
|
{
|
||||||
|
if ( d<=3 ) { latt_size[d] *= 2; }
|
||||||
|
|
||||||
|
std::cout << GridLogMessage;
|
||||||
|
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
|
||||||
|
std::cout << latt_size.back() << "\t\t";
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
|
||||||
|
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
|
||||||
|
LatticeFermion src(&Grid); random(pRNG,src);
|
||||||
|
LatticeFermion result(&Grid); result=zero;
|
||||||
|
|
||||||
|
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
|
||||||
|
|
||||||
|
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
|
||||||
|
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerNo);
|
||||||
|
bench_wilson(src,result,Dw,volume,DaggerYes);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void bench_wilson (
|
||||||
|
LatticeFermion & src,
|
||||||
|
LatticeFermion & result,
|
||||||
|
WilsonFermionR & Dw,
|
||||||
|
double const volume,
|
||||||
|
int const dag )
|
||||||
|
{
|
||||||
|
int ncall = 1000;
|
||||||
|
double t0 = usecond();
|
||||||
|
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
|
||||||
|
double t1 = usecond();
|
||||||
|
double flops = 1344 * volume * ncall;
|
||||||
|
std::cout << flops/(t1-t0) << "\t\t";
|
||||||
|
}
|
@ -1,172 +0,0 @@
|
|||||||
/*************************************************************************************
|
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
|
||||||
|
|
||||||
Source file: ./tests/Test_zmm.cc
|
|
||||||
|
|
||||||
Copyright (C) 2015
|
|
||||||
|
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
|
||||||
*************************************************************************************/
|
|
||||||
/* END LEGAL */
|
|
||||||
#include <Grid.h>
|
|
||||||
#include <PerfCount.h>
|
|
||||||
|
|
||||||
|
|
||||||
using namespace Grid;
|
|
||||||
using namespace Grid::QCD;
|
|
||||||
|
|
||||||
|
|
||||||
int bench(std::ofstream &os, std::vector<int> &latt4,int Ls);
|
|
||||||
|
|
||||||
int main(int argc,char **argv)
|
|
||||||
{
|
|
||||||
Grid_init(&argc,&argv);
|
|
||||||
std::ofstream os("zmm.dat");
|
|
||||||
|
|
||||||
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
|
|
||||||
for(int L=4;L<=32;L+=4){
|
|
||||||
for(int m=1;m<=2;m++){
|
|
||||||
for(int Ls=8;Ls<=16;Ls+=8){
|
|
||||||
std::vector<int> grid({L,L,m*L,m*L});
|
|
||||||
for(int i=0;i<4;i++) {
|
|
||||||
std::cout << grid[i]<<"x";
|
|
||||||
}
|
|
||||||
std::cout << Ls<<std::endl;
|
|
||||||
bench(os,grid,Ls);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
|
|
||||||
{
|
|
||||||
|
|
||||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
|
||||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
|
||||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
|
||||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
|
||||||
|
|
||||||
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
|
|
||||||
std::vector<int> mpi_layout = GridDefaultMpi();
|
|
||||||
int threads = GridThread::GetThreads();
|
|
||||||
|
|
||||||
std::vector<int> seeds4({1,2,3,4});
|
|
||||||
std::vector<int> seeds5({5,6,7,8});
|
|
||||||
|
|
||||||
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4);
|
|
||||||
|
|
||||||
LatticeFermion src (FGrid);
|
|
||||||
LatticeFermion tmp (FGrid);
|
|
||||||
LatticeFermion srce(FrbGrid);
|
|
||||||
|
|
||||||
LatticeFermion resulto(FrbGrid); resulto=zero;
|
|
||||||
LatticeFermion resulta(FrbGrid); resulta=zero;
|
|
||||||
LatticeFermion junk(FrbGrid); junk=zero;
|
|
||||||
LatticeFermion diff(FrbGrid);
|
|
||||||
LatticeGaugeField Umu(UGrid);
|
|
||||||
|
|
||||||
double mfc, mfa, mfo, mfl1;
|
|
||||||
|
|
||||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
|
||||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
|
||||||
random(RNG5,src);
|
|
||||||
#if 1
|
|
||||||
random(RNG4,Umu);
|
|
||||||
#else
|
|
||||||
int mmu=2;
|
|
||||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
if ( mu!=mmu ) U[mu] = zero;
|
|
||||||
if ( mu==mmu ) U[mu] = 1.0;
|
|
||||||
PokeIndex<LorentzIndex>(Umu,U[mu],mu);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
pickCheckerboard(Even,srce,src);
|
|
||||||
|
|
||||||
RealD mass=0.1;
|
|
||||||
RealD M5 =1.8;
|
|
||||||
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
|
|
||||||
int ncall=50;
|
|
||||||
double t0=usecond();
|
|
||||||
for(int i=0;i<ncall;i++){
|
|
||||||
Dw.DhopOE(srce,resulto,0);
|
|
||||||
}
|
|
||||||
double t1=usecond();
|
|
||||||
|
|
||||||
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
|
|
||||||
double flops=1344*volume/2;
|
|
||||||
|
|
||||||
mfc = flops*ncall/(t1-t0);
|
|
||||||
std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s = "<< mfc<<std::endl;
|
|
||||||
|
|
||||||
QCD::WilsonKernelsStatic::AsmOpt=1;
|
|
||||||
t0=usecond();
|
|
||||||
for(int i=0;i<ncall;i++){
|
|
||||||
Dw.DhopOE(srce,resulta,0);
|
|
||||||
}
|
|
||||||
t1=usecond();
|
|
||||||
mfa = flops*ncall/(t1-t0);
|
|
||||||
std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s = "<< mfa<<std::endl;
|
|
||||||
/*
|
|
||||||
int dag=DaggerNo;
|
|
||||||
t0=usecond();
|
|
||||||
for(int i=0;i<1;i++){
|
|
||||||
Dw.DhopInternalOMPbench(Dw.StencilEven,Dw.LebesgueEvenOdd,Dw.UmuOdd,srce,resulta,dag);
|
|
||||||
}
|
|
||||||
t1=usecond();
|
|
||||||
mfo = flops*100/(t1-t0);
|
|
||||||
std::cout<<GridLogMessage << "Called ASM-OMP Dw"<< " mflop/s = "<< mfo<<std::endl;
|
|
||||||
|
|
||||||
t0=usecond();
|
|
||||||
for(int i=0;i<1;i++){
|
|
||||||
Dw.DhopInternalL1bench(Dw.StencilEven,Dw.LebesgueEvenOdd,Dw.UmuOdd,srce,resulta,dag);
|
|
||||||
}
|
|
||||||
t1=usecond();
|
|
||||||
mfl1= flops*100/(t1-t0);
|
|
||||||
std::cout<<GridLogMessage << "Called ASM-L1 Dw"<< " mflop/s = "<< mfl1<<std::endl;
|
|
||||||
os << latt4[0]*latt4[1]*latt4[2]*latt4[3]<< " "<<Ls<<" "<< latt4[0] <<" " <<latt4[2]<< " "
|
|
||||||
<< mfc<<" "
|
|
||||||
<< mfa<<" "
|
|
||||||
<< mfo<<" "
|
|
||||||
<< mfl1<<std::endl;
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
|
||||||
Dw.DhopOE(srce,resulta,0);
|
|
||||||
PerformanceCounter Counter(i);
|
|
||||||
Counter.Start();
|
|
||||||
Dw.DhopOE(srce,resulta,0);
|
|
||||||
Counter.Stop();
|
|
||||||
Counter.Report();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
//resulta = (-0.5) * resulta;
|
|
||||||
|
|
||||||
diff = resulto-resulta;
|
|
||||||
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
|
|
||||||
std::cout<<std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,39 +0,0 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_dwf_ntpf Benchmark_dwf_sweep Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_comms_SOURCES=Benchmark_comms.cc
|
|
||||||
Benchmark_comms_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
|
|
||||||
Benchmark_dwf_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_ntpf_SOURCES=Benchmark_dwf_ntpf.cc
|
|
||||||
Benchmark_dwf_ntpf_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_dwf_sweep_SOURCES=Benchmark_dwf_sweep.cc
|
|
||||||
Benchmark_dwf_sweep_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
|
|
||||||
Benchmark_memory_asynch_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
|
|
||||||
Benchmark_memory_bandwidth_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_su3_SOURCES=Benchmark_su3.cc
|
|
||||||
Benchmark_su3_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
|
|
||||||
Benchmark_wilson_LDADD=-lGrid
|
|
||||||
|
|
||||||
|
|
||||||
Benchmark_zmm_SOURCES=Benchmark_zmm.cc
|
|
||||||
Benchmark_zmm_LDADD=-lGrid
|
|
||||||
|
|
@ -1,8 +1 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/lib
|
|
||||||
AM_LDFLAGS = -L$(top_builddir)/lib
|
|
||||||
|
|
||||||
#
|
|
||||||
# Test code
|
|
||||||
#
|
|
||||||
include Make.inc
|
include Make.inc
|
||||||
|
13
bootstrap.sh
Executable file
13
bootstrap.sh
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
|
||||||
|
|
||||||
|
echo "-- deploying Eigen source..."
|
||||||
|
wget ${EIGEN_URL} --no-check-certificate
|
||||||
|
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
|
||||||
|
rm `basename ${EIGEN_URL}`
|
||||||
|
|
||||||
|
echo '-- generating Make.inc files...'
|
||||||
|
./scripts/filelist
|
||||||
|
echo '-- generating configure script...'
|
||||||
|
autoreconf -fvi
|
530
configure.ac
530
configure.ac
@ -1,315 +1,405 @@
|
|||||||
# -*- Autoconf -*-
|
|
||||||
# Process this file with autoconf to produce a configure script.
|
|
||||||
#
|
|
||||||
# Project Grid package
|
|
||||||
#
|
|
||||||
# Time-stamp: <2015-07-10 17:46:21 neo>
|
|
||||||
|
|
||||||
AC_PREREQ([2.63])
|
AC_PREREQ([2.63])
|
||||||
AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
|
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
|
||||||
AC_CANONICAL_SYSTEM
|
AC_CANONICAL_BUILD
|
||||||
|
AC_CANONICAL_HOST
|
||||||
|
AC_CANONICAL_TARGET
|
||||||
AM_INIT_AUTOMAKE(subdir-objects)
|
AM_INIT_AUTOMAKE(subdir-objects)
|
||||||
AC_CONFIG_MACRO_DIR([m4])
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
AC_CONFIG_SRCDIR([lib/Grid.h])
|
AC_CONFIG_SRCDIR([lib/Grid.h])
|
||||||
AC_CONFIG_HEADERS([lib/Config.h])
|
AC_CONFIG_HEADERS([lib/Config.h])
|
||||||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||||
|
|
||||||
AC_MSG_NOTICE([
|
############### Checks for programs
|
||||||
|
CXXFLAGS="-O3 $CXXFLAGS"
|
||||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
Configuring $PACKAGE v$VERSION for $host
|
|
||||||
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
])
|
|
||||||
|
|
||||||
# Checks for programs.
|
|
||||||
AC_LANG(C++)
|
|
||||||
AC_PROG_CXX
|
AC_PROG_CXX
|
||||||
AC_OPENMP
|
|
||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
|
|
||||||
AX_EXT
|
|
||||||
|
|
||||||
# Checks for libraries.
|
############### Get compiler informations
|
||||||
#AX_GCC_VAR_ATTRIBUTE(aligned)
|
AC_LANG([C++])
|
||||||
|
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
|
||||||
|
AX_COMPILER_VENDOR
|
||||||
|
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
|
||||||
|
[vendor of C++ compiler that will compile the code])
|
||||||
|
AX_GXX_VERSION
|
||||||
|
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
|
||||||
|
[version of g++ that will compile the code])
|
||||||
|
|
||||||
# Checks for header files.
|
############### Checks for typedefs, structures, and compiler characteristics
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
AC_TYPE_UINT32_T
|
||||||
|
AC_TYPE_UINT64_T
|
||||||
|
|
||||||
|
############### OpenMP
|
||||||
|
AC_OPENMP
|
||||||
|
ac_openmp=no
|
||||||
|
if test "${OPENMP_CXXFLAGS}X" != "X"; then
|
||||||
|
ac_openmp=yes
|
||||||
|
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
############### Checks for header files
|
||||||
AC_CHECK_HEADERS(stdint.h)
|
AC_CHECK_HEADERS(stdint.h)
|
||||||
AC_CHECK_HEADERS(mm_malloc.h)
|
AC_CHECK_HEADERS(mm_malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc/malloc.h)
|
AC_CHECK_HEADERS(malloc/malloc.h)
|
||||||
AC_CHECK_HEADERS(malloc.h)
|
AC_CHECK_HEADERS(malloc.h)
|
||||||
AC_CHECK_HEADERS(endian.h)
|
AC_CHECK_HEADERS(endian.h)
|
||||||
AC_CHECK_HEADERS(execinfo.h)
|
AC_CHECK_HEADERS(execinfo.h)
|
||||||
AC_CHECK_HEADERS(gmp.h)
|
|
||||||
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
|
||||||
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
|
||||||
|
|
||||||
# Checks for typedefs, structures, and compiler characteristics.
|
############### GMP and MPFR
|
||||||
AC_TYPE_SIZE_T
|
AC_ARG_WITH([gmp],
|
||||||
AC_TYPE_UINT32_T
|
[AS_HELP_STRING([--with-gmp=prefix],
|
||||||
AC_TYPE_UINT64_T
|
[try this for a non-standard install prefix of the GMP library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"])
|
||||||
|
AC_ARG_WITH([mpfr],
|
||||||
|
[AS_HELP_STRING([--with-mpfr=prefix],
|
||||||
|
[try this for a non-standard install prefix of the MPFR library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
# Checks for library functions.
|
############### FFTW3
|
||||||
echo
|
AC_ARG_WITH([fftw],
|
||||||
echo Checking libraries
|
[AS_HELP_STRING([--with-fftw=prefix],
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
[try this for a non-standard install prefix of the FFTW3 library])],
|
||||||
|
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
|
||||||
|
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
|
||||||
|
|
||||||
|
############### lapack
|
||||||
|
AC_ARG_ENABLE([lapack],
|
||||||
|
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
|
||||||
|
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
|
||||||
|
|
||||||
|
case ${ac_LAPACK} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### MKL
|
||||||
|
AC_ARG_ENABLE([mkl],
|
||||||
|
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
|
||||||
|
[ac_MKL=${enable_mkl}], [ac_MKL=no])
|
||||||
|
|
||||||
|
case ${ac_MKL} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
|
*)
|
||||||
|
AM_CXXFLAGS="-I$ac_MKL/include $AM_CXXFLAGS"
|
||||||
|
AM_LDFLAGS="-L$ac_MKL/lib $AM_LDFLAGS"
|
||||||
|
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### first-touch
|
||||||
|
AC_ARG_ENABLE([numa],
|
||||||
|
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
|
||||||
|
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
|
||||||
|
|
||||||
|
case ${ac_NUMA} in
|
||||||
|
no)
|
||||||
|
;;
|
||||||
|
yes)
|
||||||
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
|
*)
|
||||||
|
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
|
||||||
|
esac
|
||||||
|
|
||||||
|
############### Checks for library functions
|
||||||
|
CXXFLAGS_CPY=$CXXFLAGS
|
||||||
|
LDFLAGS_CPY=$LDFLAGS
|
||||||
|
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
|
||||||
|
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
if test "${ac_MKL}x" != "nox"; then
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [],
|
||||||
#Please install or provide the correct path to your installation
|
[AC_MSG_ERROR("MKL enabled but library not found")])
|
||||||
#Info at: http://www.gmplib.org)])
|
fi
|
||||||
|
|
||||||
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
AC_SEARCH_LIBS([__gmpf_init], [gmp],
|
||||||
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
[AC_SEARCH_LIBS([mpfr_init], [mpfr],
|
||||||
#Please install or provide the correct path to your installation
|
[AC_DEFINE([HAVE_LIBMPFR], [1],
|
||||||
#Info at: http://www.mpfr.org/)])
|
[Define to 1 if you have the `MPFR' library])]
|
||||||
|
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
|
||||||
|
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
|
||||||
|
[have_gmp=true])
|
||||||
|
|
||||||
#
|
if test "${ac_LAPACK}x" != "nox"; then
|
||||||
# SIMD instructions selection
|
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
|
||||||
#
|
[AC_MSG_ERROR("LAPACK enabled but library not found")])
|
||||||
|
fi
|
||||||
|
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
|
AC_SEARCH_LIBS([fftw_execute], [fftw3],
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
|
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
|
||||||
[ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
|
[AC_MSG_ERROR("single precision FFTW library not found")])]
|
||||||
|
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
|
||||||
|
[have_fftw=true])
|
||||||
|
|
||||||
supported=no
|
CXXFLAGS=$CXXFLAGS_CPY
|
||||||
|
LDFLAGS=$LDFLAGS_CPY
|
||||||
|
|
||||||
ac_ZMM=no;
|
############### SIMD instruction selection
|
||||||
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=<code>],
|
||||||
|
[select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN])
|
||||||
|
|
||||||
|
case ${ax_cv_cxx_compiler_vendor} in
|
||||||
|
clang|gnu)
|
||||||
|
case ${ac_SIMD} in
|
||||||
|
SSE4)
|
||||||
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
|
SIMD_FLAGS='-msse4.2';;
|
||||||
|
AVX)
|
||||||
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx';;
|
||||||
|
AVXFMA4)
|
||||||
|
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma4';;
|
||||||
|
AVXFMA)
|
||||||
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVX2)
|
||||||
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx2 -mfma';;
|
||||||
|
AVX512)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
|
||||||
|
KNC)
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-march=knl';;
|
||||||
|
GEN)
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
QPX|BGQ)
|
||||||
|
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
|
||||||
|
esac;;
|
||||||
|
intel)
|
||||||
|
case ${ac_SIMD} in
|
||||||
|
SSE4)
|
||||||
|
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
|
||||||
|
SIMD_FLAGS='-msse4.2 -xsse4.2';;
|
||||||
|
AVX)
|
||||||
|
AC_DEFINE([AVX1],[1],[AVX intrinsics])
|
||||||
|
SIMD_FLAGS='-mavx -xavx';;
|
||||||
|
AVXFMA)
|
||||||
|
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
|
||||||
|
SIMD_FLAGS='-mavx -mfma';;
|
||||||
|
AVX2)
|
||||||
|
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
|
||||||
|
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
|
||||||
|
AVX512)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
|
||||||
|
SIMD_FLAGS='-xcore-avx512';;
|
||||||
|
KNC)
|
||||||
|
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
KNL)
|
||||||
|
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
|
||||||
|
SIMD_FLAGS='-xmic-avx512';;
|
||||||
|
GEN)
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
|
||||||
|
SIMD_FLAGS='';;
|
||||||
|
*)
|
||||||
|
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
|
||||||
|
esac;;
|
||||||
|
*)
|
||||||
|
AC_MSG_WARN([Compiler unknown, using generic vector code])
|
||||||
|
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
|
||||||
|
esac
|
||||||
|
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
|
||||||
|
|
||||||
case ${ac_SIMD} in
|
case ${ac_SIMD} in
|
||||||
SSE4)
|
AVX512|KNL)
|
||||||
echo Configuring for SSE4
|
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);;
|
||||||
AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
|
*)
|
||||||
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
|
;;
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support SSE4 instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX)
|
|
||||||
echo Configuring for AVX
|
|
||||||
AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
|
|
||||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVXFMA4)
|
|
||||||
echo Configuring for AVX
|
|
||||||
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
|
|
||||||
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX2)
|
|
||||||
echo Configuring for AVX2
|
|
||||||
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
|
|
||||||
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
|
|
||||||
supported=yes
|
|
||||||
else
|
|
||||||
AC_MSG_WARN([Your processor does not support AVX2 instructions])
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
AVX512)
|
|
||||||
echo Configuring for AVX512
|
|
||||||
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
|
|
||||||
supported="cross compilation"
|
|
||||||
ac_ZMM=yes;
|
|
||||||
;;
|
|
||||||
IMCI)
|
|
||||||
echo Configuring for IMCI
|
|
||||||
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
|
|
||||||
supported="cross compilation"
|
|
||||||
ac_ZMM=no;
|
|
||||||
;;
|
|
||||||
NEONv8)
|
|
||||||
echo Configuring for experimental ARMv8a support
|
|
||||||
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
|
|
||||||
supported="cross compilation"
|
|
||||||
;;
|
|
||||||
DEBUG)
|
|
||||||
echo Configuring without SIMD support - only for compiler DEBUGGING!
|
|
||||||
AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
|
|
||||||
case ${ac_ZMM} in
|
############### Precision selection
|
||||||
yes)
|
AC_ARG_ENABLE([precision],
|
||||||
echo Enabling ZMM source code
|
[AC_HELP_STRING([--enable-precision=single|double],
|
||||||
;;
|
[Select default word size of Real])],
|
||||||
no)
|
[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
||||||
echo Disabling ZMM source code
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ])
|
|
||||||
|
|
||||||
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
|
|
||||||
case ${ac_PRECISION} in
|
case ${ac_PRECISION} in
|
||||||
single)
|
single)
|
||||||
echo default precision is single
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
|
||||||
;;
|
;;
|
||||||
double)
|
double)
|
||||||
echo default precision is double
|
|
||||||
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#
|
############### communication type selection
|
||||||
# Comms selection
|
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
|
||||||
#
|
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
||||||
|
|
||||||
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
|
|
||||||
|
|
||||||
case ${ac_COMMS} in
|
case ${ac_COMMS} in
|
||||||
none)
|
none)
|
||||||
echo Configuring for NO communications
|
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
||||||
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
|
comms_type='none'
|
||||||
;;
|
;;
|
||||||
mpi)
|
mpi3l*)
|
||||||
echo Configuring for MPI communications
|
AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] )
|
||||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
comms_type='mpi3l'
|
||||||
|
;;
|
||||||
|
mpi3*)
|
||||||
|
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
||||||
|
comms_type='mpi3'
|
||||||
|
;;
|
||||||
|
mpi*)
|
||||||
|
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||||
|
comms_type='mpi'
|
||||||
;;
|
;;
|
||||||
shmem)
|
shmem)
|
||||||
echo Configuring for SHMEM communications
|
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
||||||
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
comms_type='shmem'
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
case ${ac_COMMS} in
|
||||||
|
*-auto)
|
||||||
|
LX_FIND_MPI
|
||||||
|
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
|
||||||
|
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
|
||||||
|
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
|
||||||
|
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
|
||||||
|
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS";;
|
||||||
|
*)
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
|
AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] )
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
|
||||||
|
|
||||||
#
|
############### RNG selection
|
||||||
# RNG selection
|
|
||||||
#
|
|
||||||
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
|
||||||
[Select Random Number Generator to be used])],\
|
[Select Random Number Generator to be used])],\
|
||||||
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
|
||||||
|
|
||||||
case ${ac_RNG} in
|
case ${ac_RNG} in
|
||||||
ranlux48)
|
ranlux48)
|
||||||
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
|
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
|
||||||
;;
|
;;
|
||||||
mt19937)
|
mt19937)
|
||||||
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
|
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#
|
############### Timer option
|
||||||
# SDE timing mode
|
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\
|
||||||
#
|
[Enable system dependent high res timers])],\
|
||||||
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers=yes|no],\
|
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
||||||
[Enable system dependent high res timers])],\
|
|
||||||
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
|
|
||||||
case ${ac_TIMERS} in
|
case ${ac_TIMERS} in
|
||||||
yes)
|
yes)
|
||||||
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
|
||||||
;;
|
;;
|
||||||
no)
|
no)
|
||||||
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
#
|
############### Chroma regression test
|
||||||
# Chroma regression tests
|
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],
|
||||||
#
|
[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
||||||
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
|
|
||||||
|
|
||||||
case ${ac_CHROMA} in
|
case ${ac_CHROMA} in
|
||||||
yes)
|
yes|no)
|
||||||
echo Enabling tests regressing to Chroma
|
|
||||||
;;
|
|
||||||
no)
|
|
||||||
echo Disabling tests regressing to Chroma
|
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
|
||||||
|
|
||||||
#
|
############### Doxygen
|
||||||
# Lapack
|
AC_PROG_DOXYGEN
|
||||||
#
|
|
||||||
AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
|
|
||||||
|
|
||||||
case ${ac_LAPACK} in
|
if test -n "$DOXYGEN"
|
||||||
yes)
|
then
|
||||||
echo Enabling lapack
|
AC_CONFIG_FILES([docs/doxy.cfg])
|
||||||
;;
|
fi
|
||||||
no)
|
|
||||||
echo Disabling lapack
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo Enabling lapack at ${ac_LAPACK}
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
|
############### Ouput
|
||||||
AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
|
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
|
||||||
|
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
|
||||||
###################################################################
|
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
|
||||||
# Checks for doxygen support
|
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
|
||||||
# if present enables the "make doxyfile" command
|
AC_SUBST([AM_CFLAGS])
|
||||||
#echo
|
AC_SUBST([AM_CXXFLAGS])
|
||||||
#echo Checking doxygen support
|
AC_SUBST([AM_LDFLAGS])
|
||||||
#echo :::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
#AC_PROG_DOXYGEN
|
|
||||||
|
|
||||||
#if test -n "$DOXYGEN"
|
|
||||||
#then
|
|
||||||
#AC_CONFIG_FILES([docs/doxy.cfg])
|
|
||||||
#fi
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo Creating configuration files
|
|
||||||
echo :::::::::::::::::::::::::::::::::::::::::::
|
|
||||||
AC_CONFIG_FILES(Makefile)
|
AC_CONFIG_FILES(Makefile)
|
||||||
AC_CONFIG_FILES(lib/Makefile)
|
AC_CONFIG_FILES(lib/Makefile)
|
||||||
AC_CONFIG_FILES(tests/Makefile)
|
AC_CONFIG_FILES(tests/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/IO/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/core/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/debug/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/forces/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/hmc/Makefile)
|
||||||
|
AC_CONFIG_FILES(tests/solver/Makefile)
|
||||||
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
AC_CONFIG_FILES(tests/qdpxx/Makefile)
|
||||||
AC_CONFIG_FILES(benchmarks/Makefile)
|
AC_CONFIG_FILES(benchmarks/Makefile)
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|
||||||
|
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
echo "
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
Summary of configuration for $PACKAGE v$VERSION
|
Summary of configuration for $PACKAGE v$VERSION
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
The following features are enabled:
|
----- PLATFORM ----------------------------------------
|
||||||
|
architecture (build) : $build_cpu
|
||||||
- architecture (build) : $build_cpu
|
os (build) : $build_os
|
||||||
- os (build) : $build_os
|
architecture (target) : $target_cpu
|
||||||
- architecture (target) : $target_cpu
|
os (target) : $target_os
|
||||||
- os (target) : $target_os
|
compiler vendor : ${ax_cv_cxx_compiler_vendor}
|
||||||
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
compiler version : ${ax_cv_gxx_version}
|
||||||
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
----- BUILD OPTIONS -----------------------------------
|
||||||
- Supported SIMD flags : $SIMD_FLAGS
|
SIMD : ${ac_SIMD}
|
||||||
----------------------------------------------------------
|
Threading : ${ac_openmp}
|
||||||
- enabled simd support : ${ac_SIMD} (config macro says supported: $supported )
|
Communications type : ${comms_type}
|
||||||
- communications type : ${ac_COMMS}
|
Default precision : ${ac_PRECISION}
|
||||||
- default precision : ${ac_PRECISION}
|
RNG choice : ${ac_RNG}
|
||||||
- RNG choice : ${ac_RNG}
|
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
|
||||||
- LAPACK : ${ac_LAPACK}
|
LAPACK : ${ac_LAPACK}
|
||||||
|
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
|
||||||
|
build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
|
||||||
"
|
graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
|
||||||
|
----- BUILD FLAGS -------------------------------------
|
||||||
|
CXXFLAGS:
|
||||||
|
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
LDFLAGS:
|
||||||
|
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
LIBS:
|
||||||
|
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
|
||||||
|
-------------------------------------------------------" > config.summary
|
||||||
|
echo ""
|
||||||
|
cat config.summary
|
||||||
|
echo ""
|
||||||
|
1
include/Grid
Symbolic link
1
include/Grid
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../lib
|
@ -29,27 +29,28 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHMS_H
|
#ifndef GRID_ALGORITHMS_H
|
||||||
#define GRID_ALGORITHMS_H
|
#define GRID_ALGORITHMS_H
|
||||||
|
|
||||||
#include <algorithms/SparseMatrix.h>
|
#include <Grid/algorithms/SparseMatrix.h>
|
||||||
#include <algorithms/LinearOperator.h>
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
#include <algorithms/Preconditioner.h>
|
#include <Grid/algorithms/Preconditioner.h>
|
||||||
|
|
||||||
#include <algorithms/approx/Zolotarev.h>
|
#include <Grid/algorithms/approx/Zolotarev.h>
|
||||||
#include <algorithms/approx/Chebyshev.h>
|
#include <Grid/algorithms/approx/Chebyshev.h>
|
||||||
#include <algorithms/approx/Remez.h>
|
#include <Grid/algorithms/approx/Remez.h>
|
||||||
#include <algorithms/approx/MultiShiftFunction.h>
|
#include <Grid/algorithms/approx/MultiShiftFunction.h>
|
||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradient.h>
|
#include <Grid/algorithms/iterative/ConjugateGradient.h>
|
||||||
#include <algorithms/iterative/ConjugateResidual.h>
|
#include <Grid/algorithms/iterative/ConjugateResidual.h>
|
||||||
#include <algorithms/iterative/NormalEquations.h>
|
#include <Grid/algorithms/iterative/NormalEquations.h>
|
||||||
#include <algorithms/iterative/SchurRedBlack.h>
|
#include <Grid/algorithms/iterative/SchurRedBlack.h>
|
||||||
|
|
||||||
#include <algorithms/iterative/ConjugateGradientMultiShift.h>
|
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
|
||||||
|
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
|
||||||
|
|
||||||
// Lanczos support
|
// Lanczos support
|
||||||
#include <algorithms/iterative/MatrixUtils.h>
|
#include <Grid/algorithms/iterative/MatrixUtils.h>
|
||||||
#include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
|
||||||
#include <algorithms/CoarsenedMatrix.h>
|
#include <Grid/algorithms/CoarsenedMatrix.h>
|
||||||
|
|
||||||
// Eigen/lanczos
|
// Eigen/lanczos
|
||||||
// EigCg
|
// EigCg
|
||||||
|
@ -40,14 +40,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
|
||||||
extern "C" {
|
|
||||||
#include <mpp/shmem.h>
|
|
||||||
extern void * shmem_align(size_t, size_t);
|
|
||||||
extern void shmem_free(void *);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
@ -65,28 +57,85 @@ public:
|
|||||||
typedef _Tp value_type;
|
typedef _Tp value_type;
|
||||||
|
|
||||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||||
|
|
||||||
alignedAllocator() throw() { }
|
alignedAllocator() throw() { }
|
||||||
|
|
||||||
alignedAllocator(const alignedAllocator&) throw() { }
|
alignedAllocator(const alignedAllocator&) throw() { }
|
||||||
|
|
||||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||||
|
|
||||||
~alignedAllocator() throw() { }
|
~alignedAllocator() throw() { }
|
||||||
|
|
||||||
pointer address(reference __x) const { return &__x; }
|
pointer address(reference __x) const { return &__x; }
|
||||||
// const_pointer address(const_reference __x) const { return &__x; }
|
|
||||||
|
|
||||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
pointer allocate(size_type __n, const void* _p= 0)
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
{
|
{
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||||
|
#else
|
||||||
|
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
_Tp tmp;
|
||||||
|
#ifdef GRID_NUMA
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for(int i=0;i<__n;i++){
|
||||||
|
ptr[i]=tmp;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
_mm_free((void *)__p);
|
||||||
|
#else
|
||||||
|
free((void *)__p);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
|
void construct(pointer __p) { };
|
||||||
|
void destroy(pointer __p) { };
|
||||||
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// MPI3 : comms must use shm region
|
||||||
|
// SHMEM: comms must use symmetric heap
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
extern "C" {
|
||||||
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
|
#include <mpp/shmem.h>
|
||||||
|
extern void * shmem_align(size_t, size_t);
|
||||||
|
extern void shmem_free(void *);
|
||||||
|
}
|
||||||
#define PARANOID_SYMMETRIC_HEAP
|
#define PARANOID_SYMMETRIC_HEAP
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename _Tp>
|
||||||
|
class commAllocator {
|
||||||
|
public:
|
||||||
|
typedef std::size_t size_type;
|
||||||
|
typedef std::ptrdiff_t difference_type;
|
||||||
|
typedef _Tp* pointer;
|
||||||
|
typedef const _Tp* const_pointer;
|
||||||
|
typedef _Tp& reference;
|
||||||
|
typedef const _Tp& const_reference;
|
||||||
|
typedef _Tp value_type;
|
||||||
|
|
||||||
|
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
|
||||||
|
commAllocator() throw() { }
|
||||||
|
commAllocator(const commAllocator&) throw() { }
|
||||||
|
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
|
||||||
|
~commAllocator() throw() { }
|
||||||
|
pointer address(reference __x) const { return &__x; }
|
||||||
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
#ifdef CRAY
|
||||||
|
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
|
||||||
|
#else
|
||||||
|
_Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
|
||||||
|
#endif
|
||||||
#ifdef PARANOID_SYMMETRIC_HEAP
|
#ifdef PARANOID_SYMMETRIC_HEAP
|
||||||
static void * bcast;
|
static void * bcast;
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||||
@ -96,55 +145,47 @@ public:
|
|||||||
|
|
||||||
if ( bcast != ptr ) {
|
if ( bcast != ptr ) {
|
||||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||||
BACKTRACEFILE();
|
// BACKTRACEFILE();
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert( bcast == (void *) ptr);
|
assert( bcast == (void *) ptr);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
shmem_free((void *)__p);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||||
#else
|
#else
|
||||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
_Tp tmp;
|
|
||||||
#undef FIRST_TOUCH_OPTIMISE
|
|
||||||
#ifdef FIRST_TOUCH_OPTIMISE
|
|
||||||
#pragma omp parallel for
|
|
||||||
for(int i=0;i<__n;i++){
|
|
||||||
ptr[i]=tmp;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deallocate(pointer __p, size_type) {
|
void deallocate(pointer __p, size_type) {
|
||||||
#ifdef GRID_COMMS_SHMEM
|
|
||||||
shmem_free((void *)__p);
|
|
||||||
#else
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_mm_free((void *)__p);
|
_mm_free((void *)__p);
|
||||||
#else
|
#else
|
||||||
free((void *)__p);
|
free((void *)__p);
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
void construct(pointer __p, const _Tp& __val) { };
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
void construct(pointer __p) { };
|
void construct(pointer __p) { };
|
||||||
|
|
||||||
void destroy(pointer __p) { };
|
void destroy(pointer __p) { };
|
||||||
};
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
template<typename _Tp> inline bool
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
// Template typedefs
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template<typename _Tp> inline bool
|
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
||||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
template<class T> using commVector = std::vector<T,commAllocator<T> >;
|
||||||
|
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||||
|
|
||||||
}; // namespace Grid
|
}; // namespace Grid
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,8 +28,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_CARTESIAN_H
|
#ifndef GRID_CARTESIAN_H
|
||||||
#define GRID_CARTESIAN_H
|
#define GRID_CARTESIAN_H
|
||||||
|
|
||||||
#include <cartesian/Cartesian_base.h>
|
#include <Grid/cartesian/Cartesian_base.h>
|
||||||
#include <cartesian/Cartesian_full.h>
|
#include <Grid/cartesian/Cartesian_full.h>
|
||||||
#include <cartesian/Cartesian_red_black.h>
|
#include <Grid/cartesian/Cartesian_red_black.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_COMMUNICATOR_H
|
#ifndef GRID_COMMUNICATOR_H
|
||||||
#define GRID_COMMUNICATOR_H
|
#define GRID_COMMUNICATOR_H
|
||||||
|
|
||||||
#include <communicator/Communicator_base.h>
|
#include <Grid/communicator/Communicator_base.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
16
lib/Cshift.h
16
lib/Cshift.h
@ -28,17 +28,25 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef _GRID_CSHIFT_H_
|
#ifndef _GRID_CSHIFT_H_
|
||||||
#define _GRID_CSHIFT_H_
|
#define _GRID_CSHIFT_H_
|
||||||
|
|
||||||
#include <cshift/Cshift_common.h>
|
#include <Grid/cshift/Cshift_common.h>
|
||||||
|
|
||||||
#ifdef GRID_COMMS_NONE
|
#ifdef GRID_COMMS_NONE
|
||||||
#include <cshift/Cshift_none.h>
|
#include <Grid/cshift/Cshift_none.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
#include <cshift/Cshift_mpi.h>
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_MPI3L
|
||||||
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
#include <cshift/Cshift_mpi.h> // uses same implementation of communicator
|
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
302
lib/FFT.h
Normal file
302
lib/FFT.h
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
|
||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/Cshift.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef _GRID_FFT_H_
|
||||||
|
#define _GRID_FFT_H_
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
#ifdef USE_MKL
|
||||||
|
#include <fftw/fftw3.h>
|
||||||
|
#else
|
||||||
|
#include <fftw3.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
template<class scalar> struct FFTW { };
|
||||||
|
|
||||||
|
#ifdef HAVE_FFTW
|
||||||
|
template<> struct FFTW<ComplexD> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftw_complex FFTW_scalar;
|
||||||
|
typedef fftw_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftw_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftw_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct FFTW<ComplexF> {
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef fftwf_complex FFTW_scalar;
|
||||||
|
typedef fftwf_plan FFTW_plan;
|
||||||
|
|
||||||
|
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
|
||||||
|
FFTW_scalar *in, const int *inembed,
|
||||||
|
int istride, int idist,
|
||||||
|
FFTW_scalar *out, const int *onembed,
|
||||||
|
int ostride, int odist,
|
||||||
|
int sign, unsigned flags) {
|
||||||
|
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
|
||||||
|
::fftwf_flops(p,add,mul,fmas);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
|
||||||
|
::fftwf_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
inline static void fftw_destroy_plan(const FFTW_plan p) {
|
||||||
|
::fftwf_destroy_plan(p);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FFTW_FORWARD
|
||||||
|
#define FFTW_FORWARD (-1)
|
||||||
|
#define FFTW_BACKWARD (+1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
class FFT {
|
||||||
|
private:
|
||||||
|
|
||||||
|
GridCartesian *vgrid;
|
||||||
|
GridCartesian *sgrid;
|
||||||
|
|
||||||
|
int Nd;
|
||||||
|
double flops;
|
||||||
|
double flops_call;
|
||||||
|
uint64_t usec;
|
||||||
|
|
||||||
|
std::vector<int> dimensions;
|
||||||
|
std::vector<int> processors;
|
||||||
|
std::vector<int> processor_coor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
static const int forward=FFTW_FORWARD;
|
||||||
|
static const int backward=FFTW_BACKWARD;
|
||||||
|
|
||||||
|
double Flops(void) {return flops;}
|
||||||
|
double MFlops(void) {return flops/usec;}
|
||||||
|
double USec(void) {return (double)usec;}
|
||||||
|
|
||||||
|
FFT ( GridCartesian * grid ) :
|
||||||
|
vgrid(grid),
|
||||||
|
Nd(grid->_ndimension),
|
||||||
|
dimensions(grid->_fdimensions),
|
||||||
|
processors(grid->_processors),
|
||||||
|
processor_coor(grid->_processor_coor)
|
||||||
|
{
|
||||||
|
flops=0;
|
||||||
|
usec =0;
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
sgrid = new GridCartesian(dimensions,layout,processors);
|
||||||
|
};
|
||||||
|
|
||||||
|
~FFT ( void) {
|
||||||
|
delete sgrid;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_dim_mask(Lattice<vobj> &result,const Lattice<vobj> &source,std::vector<int> mask,int sign){
|
||||||
|
|
||||||
|
conformable(result._grid,vgrid);
|
||||||
|
conformable(source._grid,vgrid);
|
||||||
|
Lattice<vobj> tmp(vgrid);
|
||||||
|
tmp = source;
|
||||||
|
for(int d=0;d<Nd;d++){
|
||||||
|
if( mask[d] ) {
|
||||||
|
FFT_dim(result,tmp,d,sign);
|
||||||
|
tmp=result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_all_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int sign){
|
||||||
|
std::vector<int> mask(Nd,1);
|
||||||
|
FFT_dim_mask(result,source,mask,sign);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int sign){
|
||||||
|
#ifndef HAVE_FFTW
|
||||||
|
assert(0);
|
||||||
|
#else
|
||||||
|
conformable(result._grid,vgrid);
|
||||||
|
conformable(source._grid,vgrid);
|
||||||
|
|
||||||
|
int L = vgrid->_ldimensions[dim];
|
||||||
|
int G = vgrid->_fdimensions[dim];
|
||||||
|
|
||||||
|
std::vector<int> layout(Nd,1);
|
||||||
|
std::vector<int> pencil_gd(vgrid->_fdimensions);
|
||||||
|
|
||||||
|
pencil_gd[dim] = G*processors[dim];
|
||||||
|
|
||||||
|
// Pencil global vol LxLxGxLxL per node
|
||||||
|
GridCartesian pencil_g(pencil_gd,layout,processors);
|
||||||
|
|
||||||
|
// Construct pencils
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
typedef typename sobj::scalar_type scalar;
|
||||||
|
|
||||||
|
Lattice<sobj> pgbuf(&pencil_g);
|
||||||
|
|
||||||
|
|
||||||
|
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||||
|
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||||
|
|
||||||
|
int Ncomp = sizeof(sobj)/sizeof(scalar);
|
||||||
|
int Nlow = 1;
|
||||||
|
for(int d=0;d<dim;d++){
|
||||||
|
Nlow*=vgrid->_ldimensions[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
int rank = 1; /* 1d transforms */
|
||||||
|
int n[] = {G}; /* 1d transforms of length G */
|
||||||
|
int howmany = Ncomp;
|
||||||
|
int odist,idist,istride,ostride;
|
||||||
|
idist = odist = 1; /* Distance between consecutive FT's */
|
||||||
|
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
|
||||||
|
int *inembed = n, *onembed = n;
|
||||||
|
|
||||||
|
scalar div;
|
||||||
|
if ( sign == backward ) div = 1.0/G;
|
||||||
|
else if ( sign == forward ) div = 1.0;
|
||||||
|
else assert(0);
|
||||||
|
|
||||||
|
FFTW_plan p;
|
||||||
|
{
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[0];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[0];
|
||||||
|
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
|
||||||
|
in,inembed,
|
||||||
|
istride,idist,
|
||||||
|
out,onembed,
|
||||||
|
ostride, odist,
|
||||||
|
sign,FFTW_ESTIMATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Barrel shift and collect global pencil
|
||||||
|
std::vector<int> lcoor(Nd), gcoor(Nd);
|
||||||
|
result = source;
|
||||||
|
for(int p=0;p<processors[dim];p++) {
|
||||||
|
PARALLEL_REGION
|
||||||
|
{
|
||||||
|
std::vector<int> cbuf(Nd);
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,cbuf);
|
||||||
|
peekLocalSite(s,result,cbuf);
|
||||||
|
cbuf[dim]+=p*L;
|
||||||
|
pokeLocalSite(s,pgbuf,cbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = Cshift(result,dim,L);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over orthog coords
|
||||||
|
int NN=pencil_g.lSites();
|
||||||
|
GridStopWatch timer;
|
||||||
|
timer.Start();
|
||||||
|
PARALLEL_REGION
|
||||||
|
{
|
||||||
|
std::vector<int> cbuf(Nd);
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
|
for(int idx=0;idx<NN;idx++) {
|
||||||
|
pencil_g.LocalIndexToLocalCoor(idx, cbuf);
|
||||||
|
if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
|
||||||
|
FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
|
FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[idx];
|
||||||
|
FFTW<scalar>::fftw_execute_dft(p,in,out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
timer.Stop();
|
||||||
|
|
||||||
|
// performance counting
|
||||||
|
double add,mul,fma;
|
||||||
|
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
|
||||||
|
flops_call = add+mul+2.0*fma;
|
||||||
|
usec += timer.useconds();
|
||||||
|
flops+= flops_call*NN;
|
||||||
|
|
||||||
|
// writing out result
|
||||||
|
int pc = processor_coor[dim];
|
||||||
|
PARALLEL_REGION
|
||||||
|
{
|
||||||
|
std::vector<int> clbuf(Nd), cgbuf(Nd);
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
|
for(int idx=0;idx<sgrid->lSites();idx++) {
|
||||||
|
sgrid->LocalIndexToLocalCoor(idx,clbuf);
|
||||||
|
cgbuf = clbuf;
|
||||||
|
cgbuf[dim] = clbuf[dim]+L*pc;
|
||||||
|
peekLocalSite(s,pgbuf,cgbuf);
|
||||||
|
s = s * div;
|
||||||
|
pokeLocalSite(s,result,clbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// destroying plan
|
||||||
|
FFTW<scalar>::fftw_destroy_plan(p);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
45
lib/Grid.h
45
lib/Grid.h
@ -59,29 +59,30 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
///////////////////
|
///////////////////
|
||||||
// Grid headers
|
// Grid headers
|
||||||
///////////////////
|
///////////////////
|
||||||
#include <serialisation/Serialisation.h>
|
#include <Grid/serialisation/Serialisation.h>
|
||||||
#include <Config.h>
|
#include "Config.h"
|
||||||
#include <Timer.h>
|
#include <Grid/Timer.h>
|
||||||
#include <PerfCount.h>
|
#include <Grid/PerfCount.h>
|
||||||
#include <Log.h>
|
#include <Grid/Log.h>
|
||||||
#include <AlignedAllocator.h>
|
#include <Grid/AlignedAllocator.h>
|
||||||
#include <Simd.h>
|
#include <Grid/Simd.h>
|
||||||
#include <Threads.h>
|
#include <Grid/Threads.h>
|
||||||
#include <Lexicographic.h>
|
#include <Grid/Lexicographic.h>
|
||||||
#include <Communicator.h>
|
#include <Grid/Init.h>
|
||||||
#include <Cartesian.h>
|
#include <Grid/Communicator.h>
|
||||||
#include <Tensors.h>
|
#include <Grid/Cartesian.h>
|
||||||
#include <Lattice.h>
|
#include <Grid/Tensors.h>
|
||||||
#include <Cshift.h>
|
#include <Grid/Lattice.h>
|
||||||
#include <Stencil.h>
|
#include <Grid/Cshift.h>
|
||||||
#include <Algorithms.h>
|
#include <Grid/Stencil.h>
|
||||||
#include <parallelIO/BinaryIO.h>
|
#include <Grid/Algorithms.h>
|
||||||
#include <qcd/QCD.h>
|
#include <Grid/parallelIO/BinaryIO.h>
|
||||||
#include <parallelIO/NerscIO.h>
|
#include <Grid/FFT.h>
|
||||||
#include <Init.h>
|
|
||||||
|
|
||||||
#include <qcd/hmc/NerscCheckpointer.h>
|
#include <Grid/qcd/QCD.h>
|
||||||
#include <qcd/hmc/HmcRunner.h>
|
#include <Grid/parallelIO/NerscIO.h>
|
||||||
|
#include <Grid/qcd/hmc/NerscCheckpointer.h>
|
||||||
|
#include <Grid/qcd/hmc/HmcRunner.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
238
lib/Init.cc
238
lib/Init.cc
@ -44,9 +44,33 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
|
||||||
|
#include <fenv.h>
|
||||||
|
#ifdef __APPLE__
|
||||||
|
static int
|
||||||
|
feenableexcept (unsigned int excepts)
|
||||||
|
{
|
||||||
|
static fenv_t fenv;
|
||||||
|
unsigned int new_excepts = excepts & FE_ALL_EXCEPT,
|
||||||
|
old_excepts; // previous masks
|
||||||
|
|
||||||
|
if ( fegetenv (&fenv) ) return -1;
|
||||||
|
old_excepts = fenv.__control & FE_ALL_EXCEPT;
|
||||||
|
|
||||||
|
// unmask
|
||||||
|
fenv.__control &= ~new_excepts;
|
||||||
|
fenv.__mxcsr &= ~(new_excepts << 7);
|
||||||
|
|
||||||
|
return ( fesetenv (&fenv) ? -1 : old_excepts );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Convenience functions to access stadard command line arg
|
// Convenience functions to access stadard command line arg
|
||||||
// driven parallelism controls
|
// driven parallelism controls
|
||||||
@ -123,6 +147,13 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GridCmdOptionInt(std::string &str,int & val)
|
||||||
|
{
|
||||||
|
std::stringstream ss(str);
|
||||||
|
ss>>val;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
std::vector<int> &latt,
|
std::vector<int> &latt,
|
||||||
@ -154,12 +185,11 @@ void GridParseLayout(char **argv,int argc,
|
|||||||
GridThread::SetThreads(ompthreads[0]);
|
GridThread::SetThreads(ompthreads[0]);
|
||||||
}
|
}
|
||||||
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
|
||||||
std::vector<int> cores(0);
|
int cores;
|
||||||
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
|
||||||
GridCmdOptionIntVector(arg,cores);
|
GridCmdOptionInt(arg,cores);
|
||||||
GridThread::SetCores(cores[0]);
|
GridThread::SetCores(cores);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
||||||
@ -168,33 +198,40 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
|||||||
return oss.str();
|
return oss.str();
|
||||||
}
|
}
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
//
|
// Reinit guard
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
|
static int Grid_is_initialised = 0;
|
||||||
|
|
||||||
|
|
||||||
void Grid_init(int *argc,char ***argv)
|
void Grid_init(int *argc,char ***argv)
|
||||||
{
|
{
|
||||||
CartesianCommunicator::Init(argc,argv);
|
|
||||||
|
|
||||||
// Parse command line args.
|
|
||||||
|
|
||||||
GridLogger::StopWatch.Start();
|
GridLogger::StopWatch.Start();
|
||||||
|
|
||||||
std::string arg;
|
std::string arg;
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Shared memory block size
|
||||||
|
////////////////////////////////////
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){
|
||||||
|
int MB;
|
||||||
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm");
|
||||||
|
GridCmdOptionInt(arg,MB);
|
||||||
|
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::Init(argc,argv);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Logging
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
std::vector<std::string> logstreams;
|
std::vector<std::string> logstreams;
|
||||||
std::string defaultLog("Error,Warning,Message,Performance");
|
std::string defaultLog("Error,Warning,Message,Performance");
|
||||||
|
|
||||||
GridCmdOptionCSL(defaultLog,logstreams);
|
GridCmdOptionCSL(defaultLog,logstreams);
|
||||||
GridLogConfigure(logstreams);
|
GridLogConfigure(logstreams);
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
|
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
||||||
std::cout<<GridLogMessage<<"--help : this message"<<std::endl;
|
Grid_quiesce_nodes();
|
||||||
std::cout<<GridLogMessage<<"--debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--debug-stdout : print stdout from EVERY node"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--threads n : default number of OMP threads"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl;
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
|
||||||
@ -203,57 +240,67 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
GridLogConfigure(logstreams);
|
GridLogConfigure(logstreams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Help message
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){
|
||||||
Grid_debug_handler_init();
|
std::cout<<GridLogMessage<<" --help : this message"<<std::endl;
|
||||||
}
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
|
std::cout<<GridLogMessage<<"Geometry:"<<std::endl;
|
||||||
Grid_quiesce_nodes();
|
std::cout<<GridLogMessage<<" --mpi n.n.n.n : default MPI decomposition"<<std::endl;
|
||||||
}
|
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-opt") ){
|
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
|
||||||
QCD::WilsonKernelsStatic::HandOpt=1;
|
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
|
||||||
}
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
|
||||||
LebesgueOrder::UseLebesgueOrder=1;
|
std::cout<<GridLogMessage<<" --log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --debug-signals : catch sigsegv and print a blame report"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"Performance:"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --dslash-asm : Wilson kernel for AVX512"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --lebesgue : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<" --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<std::endl;
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
////////////////////////////////////
|
||||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
// Banner
|
||||||
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
////////////////////////////////////
|
||||||
}
|
|
||||||
GridParseLayout(*argv,*argc,
|
|
||||||
Grid_default_latt,
|
|
||||||
Grid_default_mpi);
|
|
||||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
|
||||||
std::cout<<GridLogMessage<<"Grid Decomposition\n";
|
|
||||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
std::string COL_RED = GridLogColours.colour["RED"];
|
||||||
|
std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
|
||||||
|
std::string COL_BLACK = GridLogColours.colour["BLACK"];
|
||||||
|
std::string COL_GREEN = GridLogColours.colour["GREEN"];
|
||||||
|
std::string COL_BLUE = GridLogColours.colour["BLUE"];
|
||||||
|
std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
|
||||||
|
std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
|
||||||
|
|
||||||
std::cout <<std::endl;
|
std::cout <<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__|__|__|__"<< "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__|__|__|__"<< "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__| | | "<< "| | | "<<Logger::PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ | | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|__ "<< " "<<Logger::PURPLE<<" "<< " _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<" GGGG "<<Logger::RED<<" RRRR "<<Logger::BLUE <<" III "<<Logger::PURPLE<<"DDDD "<<Logger::PURPLE<<" _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<"G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D "<<Logger::PURPLE<<" _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<"G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D"<<Logger::PURPLE<<" _|__"<<std::endl;
|
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<"G GG "<<Logger::RED<<" RRRR "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D"<<Logger::GREEN <<" _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<"G G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D "<<Logger::GREEN <<" _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<" GGGG "<<Logger::RED<<" R R "<<Logger::BLUE <<" III "<<Logger::PURPLE<<"DDDD "<<Logger::GREEN <<" _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|__ "<< " "<<Logger::GREEN <<" "<< " _|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|_ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|__|__|__|__"<< "|__|__|_"<<Logger::GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << "__|__|__|__|__"<< "|__|__|_"<<Logger::GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
|
||||||
std::cout <<Logger::BLUE << " | | | | "<< "| | | "<<Logger::GREEN <<" | | |"<< " | | | | "<<std::endl;
|
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout <<Logger::YELLOW<< std::endl;
|
std::cout <<COL_YELLOW<< std::endl;
|
||||||
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
|
||||||
std::cout << "Colours by Tadahito Boyle "<<std::endl;
|
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
|
||||||
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
|
||||||
@ -264,13 +311,65 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
|
||||||
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
|
||||||
std::cout << "GNU General Public License for more details."<<std::endl;
|
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||||
std::cout << Logger::BLACK <<std::endl;
|
std::cout << COL_BACKGROUND <<std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Debug and performance options
|
||||||
|
////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
|
||||||
|
Grid_debug_handler_init();
|
||||||
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
|
||||||
|
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
|
||||||
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
|
||||||
|
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
|
||||||
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
|
||||||
|
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
|
||||||
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||||
|
LebesgueOrder::UseLebesgueOrder=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||||
|
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||||
|
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
||||||
|
}
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
|
||||||
|
GridLogTimestamp(0);
|
||||||
|
} else {
|
||||||
|
GridLogTimestamp(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
GridParseLayout(*argv,*argc,
|
||||||
|
Grid_default_latt,
|
||||||
|
Grid_default_mpi);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
|
||||||
|
|
||||||
|
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||||
|
std::cout<<GridLogMessage<<"Grid Decomposition\n";
|
||||||
|
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"\tMPI tasks : "<<GridCmdVectorIntToString(GridDefaultMpi())<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"\tvRealF : "<<sizeof(vRealF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealF::Nsimd()))<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"\tvRealD : "<<sizeof(vRealD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vRealD::Nsimd()))<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"\tvComplexF : "<<sizeof(vComplexF)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexF::Nsimd()))<<std::endl;
|
||||||
|
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Grid_is_initialised = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Grid_finalize(void)
|
void Grid_finalize(void)
|
||||||
{
|
{
|
||||||
#ifdef GRID_COMMS_MPI
|
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
Grid_unquiesce_nodes();
|
Grid_unquiesce_nodes();
|
||||||
#endif
|
#endif
|
||||||
@ -317,10 +416,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
|
|||||||
exit(0);
|
exit(0);
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
#ifdef GRID_FPE
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
#include <fenv.h>
|
|
||||||
#endif
|
|
||||||
void Grid_debug_handler_init(void)
|
void Grid_debug_handler_init(void)
|
||||||
{
|
{
|
||||||
struct sigaction sa,osa;
|
struct sigaction sa,osa;
|
||||||
@ -329,9 +425,9 @@ void Grid_debug_handler_init(void)
|
|||||||
sa.sa_flags = SA_SIGINFO;
|
sa.sa_flags = SA_SIGINFO;
|
||||||
sigaction(SIGSEGV,&sa,NULL);
|
sigaction(SIGSEGV,&sa,NULL);
|
||||||
sigaction(SIGTRAP,&sa,NULL);
|
sigaction(SIGTRAP,&sa,NULL);
|
||||||
#ifdef GRID_FPE
|
|
||||||
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
|
||||||
|
|
||||||
sigaction(SIGFPE,&sa,NULL);
|
sigaction(SIGFPE,&sa,NULL);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ namespace Grid {
|
|||||||
|
|
||||||
void Grid_init(int *argc,char ***argv);
|
void Grid_init(int *argc,char ***argv);
|
||||||
void Grid_finalize(void);
|
void Grid_finalize(void);
|
||||||
|
|
||||||
// internal, controled with --handle
|
// internal, controled with --handle
|
||||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||||
void Grid_debug_handler_init(void);
|
void Grid_debug_handler_init(void);
|
||||||
@ -44,6 +45,7 @@ namespace Grid {
|
|||||||
const std::vector<int> &GridDefaultMpi(void);
|
const std::vector<int> &GridDefaultMpi(void);
|
||||||
const int &GridThreads(void) ;
|
const int &GridThreads(void) ;
|
||||||
void GridSetThreads(int t) ;
|
void GridSetThreads(int t) ;
|
||||||
|
void GridLogTimestamp(int);
|
||||||
|
|
||||||
// Common parsing chores
|
// Common parsing chores
|
||||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
||||||
@ -52,6 +54,7 @@ namespace Grid {
|
|||||||
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
|
void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
|
||||||
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec);
|
void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec);
|
||||||
|
|
||||||
|
|
||||||
void GridParseLayout(char **argv,int argc,
|
void GridParseLayout(char **argv,int argc,
|
||||||
std::vector<int> &latt,
|
std::vector<int> &latt,
|
||||||
std::vector<int> &simd,
|
std::vector<int> &simd,
|
||||||
|
@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_LATTICE_H
|
#ifndef GRID_LATTICE_H
|
||||||
#define GRID_LATTICE_H
|
#define GRID_LATTICE_H
|
||||||
|
|
||||||
#include <lattice/Lattice_base.h>
|
#include <Grid/lattice/Lattice_base.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
150
lib/Log.cc
150
lib/Log.cc
@ -1,126 +1,112 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/Log.cc
|
Source file: ./lib/Log.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
#include <cxxabi.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
std::string demangle(const char* name) {
|
||||||
|
|
||||||
|
int status = -4; // some arbitrary value to eliminate the compiler warning
|
||||||
|
|
||||||
|
// enable c++11 by passing the flag -std=c++11 to g++
|
||||||
|
std::unique_ptr<char, void(*)(void*)> res {
|
||||||
|
abi::__cxa_demangle(name, NULL, NULL, &status),
|
||||||
|
std::free
|
||||||
|
};
|
||||||
|
|
||||||
|
return (status==0) ? res.get() : name ;
|
||||||
|
}
|
||||||
|
|
||||||
GridStopWatch Logger::StopWatch;
|
GridStopWatch Logger::StopWatch;
|
||||||
std::ostream Logger::devnull(0);
|
int Logger::timestamp;
|
||||||
std::string Logger::BLACK("\033[30m");
|
std::ostream Logger::devnull(0);
|
||||||
std::string Logger::RED("\033[31m");
|
|
||||||
std::string Logger::GREEN("\033[32m");
|
|
||||||
std::string Logger::YELLOW("\033[33m");
|
|
||||||
std::string Logger::BLUE("\033[34m");
|
|
||||||
std::string Logger::PURPLE("\033[35m");
|
|
||||||
std::string Logger::CYAN("\033[36m");
|
|
||||||
std::string Logger::WHITE("\033[37m");
|
|
||||||
std::string Logger::NORMAL("\033[0;39m");
|
|
||||||
std::string EMPTY("");
|
|
||||||
|
|
||||||
#if 0
|
void GridLogTimestamp(int on){
|
||||||
GridLogger GridLogError (1,"Error",Logger::RED);
|
Logger::Timestamp(on);
|
||||||
GridLogger GridLogWarning (1,"Warning",Logger::YELLOW);
|
}
|
||||||
GridLogger GridLogMessage (1,"Message",Logger::BLACK);
|
|
||||||
GridLogger GridLogDebug (1,"Debug",Logger::PURPLE);
|
|
||||||
GridLogger GridLogPerformance(1,"Performance",Logger::GREEN);
|
|
||||||
GridLogger GridLogIterative (1,"Iterative",Logger::BLUE);
|
|
||||||
GridLogger GridLogIntegrator (1,"Integrator",Logger::BLUE);
|
|
||||||
#else
|
|
||||||
GridLogger GridLogError (1,"Error",EMPTY);
|
|
||||||
GridLogger GridLogWarning (1,"Warning",EMPTY);
|
|
||||||
GridLogger GridLogMessage (1,"Message",EMPTY);
|
|
||||||
GridLogger GridLogDebug (1,"Debug",EMPTY);
|
|
||||||
GridLogger GridLogPerformance(1,"Performance",EMPTY);
|
|
||||||
GridLogger GridLogIterative (1,"Iterative",EMPTY);
|
|
||||||
GridLogger GridLogIntegrator (1,"Integrator",EMPTY);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void GridLogConfigure(std::vector<std::string> &logstreams)
|
Colours GridLogColours(0);
|
||||||
{
|
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
|
||||||
|
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
|
||||||
|
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
|
||||||
|
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE");
|
||||||
|
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||||
|
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE");
|
||||||
|
GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE");
|
||||||
|
|
||||||
|
void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||||
GridLogError.Active(0);
|
GridLogError.Active(0);
|
||||||
GridLogWarning.Active(0);
|
GridLogWarning.Active(0);
|
||||||
GridLogMessage.Active(0);
|
GridLogMessage.Active(1); // at least the messages should be always on
|
||||||
GridLogIterative.Active(0);
|
GridLogIterative.Active(0);
|
||||||
GridLogDebug.Active(0);
|
GridLogDebug.Active(0);
|
||||||
GridLogPerformance.Active(0);
|
GridLogPerformance.Active(0);
|
||||||
GridLogIntegrator.Active(0);
|
GridLogIntegrator.Active(0);
|
||||||
|
GridLogColours.Active(0);
|
||||||
|
|
||||||
int blackAndWhite = 1;
|
for (int i = 0; i < logstreams.size(); i++) {
|
||||||
if(blackAndWhite){
|
if (logstreams[i] == std::string("Error")) GridLogError.Active(1);
|
||||||
Logger::BLACK = std::string("");
|
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
|
||||||
Logger::RED =Logger::BLACK;
|
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
|
||||||
Logger::GREEN =Logger::BLACK;
|
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
||||||
Logger::YELLOW =Logger::BLACK;
|
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||||
Logger::BLUE =Logger::BLACK;
|
if (logstreams[i] == std::string("Performance"))
|
||||||
Logger::PURPLE =Logger::BLACK;
|
GridLogPerformance.Active(1);
|
||||||
Logger::CYAN =Logger::BLACK;
|
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1);
|
||||||
Logger::WHITE =Logger::BLACK;
|
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||||
Logger::NORMAL =Logger::BLACK;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i=0;i<logstreams.size();i++){
|
|
||||||
if ( logstreams[i]== std::string("Error") ) GridLogError.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Warning") ) GridLogWarning.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Message") ) GridLogMessage.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Iterative") ) GridLogIterative.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Debug") ) GridLogDebug.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
|
|
||||||
if ( logstreams[i]== std::string("Integrator" ) ) GridLogIntegrator.Active(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// Verbose limiter on MPI tasks
|
// Verbose limiter on MPI tasks
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
void Grid_quiesce_nodes(void)
|
void Grid_quiesce_nodes(void) {
|
||||||
{
|
int me = 0;
|
||||||
int me=0;
|
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
|
||||||
#ifdef GRID_COMMS_MPI
|
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD,&me);
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
me = shmem_my_pe();
|
me = shmem_my_pe();
|
||||||
#endif
|
#endif
|
||||||
if ( me ) {
|
if (me) {
|
||||||
std::cout.setstate(std::ios::badbit);
|
std::cout.setstate(std::ios::badbit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Grid_unquiesce_nodes(void)
|
void Grid_unquiesce_nodes(void) {
|
||||||
{
|
|
||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
std::cout.clear();
|
std::cout.clear();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
160
lib/Log.h
160
lib/Log.h
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Antonin Portelli <antonin.portelli@me.com>
|
Author: Antonin Portelli <antonin.portelli@me.com>
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -27,6 +27,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#ifndef GRID_LOG_H
|
#ifndef GRID_LOG_H
|
||||||
#define GRID_LOG_H
|
#define GRID_LOG_H
|
||||||
|
|
||||||
@ -36,54 +39,98 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||||
int Rank(void); // used for early stage debug before library init
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
class Colours{
|
||||||
|
protected:
|
||||||
|
bool is_active;
|
||||||
|
public:
|
||||||
|
std::map<std::string, std::string> colour;
|
||||||
|
|
||||||
|
Colours(bool activate=false){
|
||||||
|
Active(activate);
|
||||||
|
};
|
||||||
|
|
||||||
|
void Active(bool activate){
|
||||||
|
is_active=activate;
|
||||||
|
if (is_active){
|
||||||
|
colour["BLACK"] ="\033[30m";
|
||||||
|
colour["RED"] ="\033[31m";
|
||||||
|
colour["GREEN"] ="\033[32m";
|
||||||
|
colour["YELLOW"] ="\033[33m";
|
||||||
|
colour["BLUE"] ="\033[34m";
|
||||||
|
colour["PURPLE"] ="\033[35m";
|
||||||
|
colour["CYAN"] ="\033[36m";
|
||||||
|
colour["WHITE"] ="\033[37m";
|
||||||
|
colour["NORMAL"] ="\033[0;39m";
|
||||||
|
} else {
|
||||||
|
colour["BLACK"] ="";
|
||||||
|
colour["RED"] ="";
|
||||||
|
colour["GREEN"] ="";
|
||||||
|
colour["YELLOW"]="";
|
||||||
|
colour["BLUE"] ="";
|
||||||
|
colour["PURPLE"]="";
|
||||||
|
colour["CYAN"] ="";
|
||||||
|
colour["WHITE"] ="";
|
||||||
|
colour["NORMAL"]="";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class Logger {
|
class Logger {
|
||||||
protected:
|
protected:
|
||||||
int active;
|
Colours &Painter;
|
||||||
std::string name, topName, COLOUR;
|
int active;
|
||||||
public:
|
static int timestamp;
|
||||||
static GridStopWatch StopWatch;
|
std::string name, topName;
|
||||||
static std::ostream devnull;
|
std::string COLOUR;
|
||||||
|
|
||||||
static std::string BLACK;
|
public:
|
||||||
static std::string RED ;
|
static GridStopWatch StopWatch;
|
||||||
static std::string GREEN;
|
static std::ostream devnull;
|
||||||
static std::string YELLOW;
|
|
||||||
static std::string BLUE ;
|
std::string background() {return Painter.colour["NORMAL"];}
|
||||||
static std::string PURPLE;
|
std::string evidence() {return Painter.colour["YELLOW"];}
|
||||||
static std::string CYAN ;
|
std::string colour() {return Painter.colour[COLOUR];}
|
||||||
static std::string WHITE ;
|
|
||||||
static std::string NORMAL;
|
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on),
|
||||||
|
name(nm),
|
||||||
Logger(std::string topNm, int on, std::string nm,std::string col)
|
topName(topNm),
|
||||||
: active(on), name(nm), topName(topNm), COLOUR(col) {};
|
Painter(col_class),
|
||||||
|
COLOUR(col) {} ;
|
||||||
void Active(int on) {active = on;};
|
|
||||||
int isActive(void) {return active;};
|
void Active(int on) {active = on;};
|
||||||
|
int isActive(void) {return active;};
|
||||||
friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
|
static void Timestamp(int on) {timestamp = on;};
|
||||||
if ( log.active ) {
|
|
||||||
StopWatch.Stop();
|
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||||
GridTime now = StopWatch.Elapsed();
|
|
||||||
StopWatch.Start();
|
if ( log.active ) {
|
||||||
stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : ";
|
stream << log.background()<< log.topName << log.background()<< " : ";
|
||||||
stream << log.COLOUR <<std::setw(11) << log.name << BLACK << " : ";
|
stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
|
||||||
stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ;
|
if ( log.timestamp ) {
|
||||||
stream << log.COLOUR;
|
StopWatch.Stop();
|
||||||
return stream;
|
GridTime now = StopWatch.Elapsed();
|
||||||
} else {
|
StopWatch.Start();
|
||||||
return devnull;
|
stream << log.evidence()<< now << log.background() << " : " ;
|
||||||
}
|
}
|
||||||
|
stream << log.colour();
|
||||||
|
return stream;
|
||||||
|
} else {
|
||||||
|
return devnull;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class GridLogger: public Logger {
|
class GridLogger: public Logger {
|
||||||
public:
|
public:
|
||||||
GridLogger(int on, std::string nm, std::string col = Logger::BLACK): Logger("Grid", on, nm, col){};
|
GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"):
|
||||||
|
Logger("Grid", on, nm, col_class, col_key){};
|
||||||
};
|
};
|
||||||
|
|
||||||
void GridLogConfigure(std::vector<std::string> &logstreams);
|
void GridLogConfigure(std::vector<std::string> &logstreams);
|
||||||
@ -95,38 +142,41 @@ extern GridLogger GridLogDebug ;
|
|||||||
extern GridLogger GridLogPerformance;
|
extern GridLogger GridLogPerformance;
|
||||||
extern GridLogger GridLogIterative ;
|
extern GridLogger GridLogIterative ;
|
||||||
extern GridLogger GridLogIntegrator ;
|
extern GridLogger GridLogIntegrator ;
|
||||||
|
extern Colours GridLogColours;
|
||||||
|
|
||||||
|
std::string demangle(const char* name) ;
|
||||||
|
|
||||||
#define _NBACKTRACE (256)
|
#define _NBACKTRACE (256)
|
||||||
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||||
|
|
||||||
#define BACKTRACEFILE() {\
|
#define BACKTRACEFILE() {\
|
||||||
char string[20]; \
|
char string[20]; \
|
||||||
std::sprintf(string,"backtrace.%d",Rank()); \
|
std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \
|
||||||
std::FILE * fp = std::fopen(string,"w"); \
|
std::FILE * fp = std::fopen(string,"w"); \
|
||||||
BACKTRACEFP(fp)\
|
BACKTRACEFP(fp)\
|
||||||
std::fclose(fp); \
|
std::fclose(fp); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_EXECINFO_H
|
#ifdef HAVE_EXECINFO_H
|
||||||
#define BACKTRACEFP(fp) { \
|
#define BACKTRACEFP(fp) { \
|
||||||
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
|
||||||
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
|
||||||
for (int i = 0; i < symbols; i++){\
|
for (int i = 0; i < symbols; i++){\
|
||||||
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
|
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, demangle(strings[i]).c_str()); std::fflush(fp); \
|
||||||
}\
|
}\
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define BACKTRACEFP(fp) { \
|
#define BACKTRACEFP(fp) { \
|
||||||
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
|
||||||
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
|
||||||
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
|
||||||
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
|
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define BACKTRACE() BACKTRACEFP(stdout)
|
#define BACKTRACE() BACKTRACEFP(stdout)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because one or more lines are too long
@ -1,32 +1,37 @@
|
|||||||
# additional include paths necessary to compile the C++ library
|
|
||||||
AM_CXXFLAGS = -I$(top_srcdir)/
|
|
||||||
|
|
||||||
extra_sources=
|
extra_sources=
|
||||||
if BUILD_COMMS_MPI
|
if BUILD_COMMS_MPI
|
||||||
extra_sources+=communicator/Communicator_mpi.cc
|
extra_sources+=communicator/Communicator_mpi.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
|
endif
|
||||||
|
|
||||||
|
if BUILD_COMMS_MPI3
|
||||||
|
extra_sources+=communicator/Communicator_mpi3.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
|
endif
|
||||||
|
|
||||||
|
if BUILD_COMMS_MPI3L
|
||||||
|
extra_sources+=communicator/Communicator_mpi3_leader.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if BUILD_COMMS_SHMEM
|
if BUILD_COMMS_SHMEM
|
||||||
extra_sources+=communicator/Communicator_shmem.cc
|
extra_sources+=communicator/Communicator_shmem.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if BUILD_COMMS_NONE
|
if BUILD_COMMS_NONE
|
||||||
extra_sources+=communicator/Communicator_none.cc
|
extra_sources+=communicator/Communicator_none.cc
|
||||||
|
extra_sources+=communicator/Communicator_base.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# Libraries
|
# Libraries
|
||||||
#
|
#
|
||||||
|
|
||||||
include Make.inc
|
include Make.inc
|
||||||
|
include Eigen.inc
|
||||||
|
|
||||||
lib_LIBRARIES = libGrid.a
|
lib_LIBRARIES = libGrid.a
|
||||||
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
|
||||||
|
|
||||||
|
|
||||||
# qcd/action/fermion/PartialFractionFermion5D.cc\ \
|
|
||||||
#
|
|
||||||
# Include files
|
|
||||||
#
|
|
||||||
nobase_include_HEADERS=$(HFILES)
|
|
||||||
|
|
||||||
|
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
|
||||||
|
libGrid_adir = $(pkgincludedir)
|
||||||
|
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
|
||||||
|
@ -43,6 +43,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#else
|
#else
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef __x86_64__
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -86,7 +89,6 @@ inline uint64_t cyclecount(void){
|
|||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
#elif defined __x86_64__
|
#elif defined __x86_64__
|
||||||
#include <x86intrin.h>
|
|
||||||
inline uint64_t cyclecount(void){
|
inline uint64_t cyclecount(void){
|
||||||
return __rdtsc();
|
return __rdtsc();
|
||||||
// unsigned int dummy;
|
// unsigned int dummy;
|
||||||
|
61
lib/Simd.h
61
lib/Simd.h
@ -1,32 +1,33 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/Simd.h
|
Source file: ./lib/Simd.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_SIMD_H
|
#ifndef GRID_SIMD_H
|
||||||
#define GRID_SIMD_H
|
#define GRID_SIMD_H
|
||||||
|
|
||||||
@ -118,6 +119,14 @@ namespace Grid {
|
|||||||
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
|
||||||
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
|
||||||
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
|
||||||
|
|
||||||
|
// define projections to real and imaginay parts
|
||||||
|
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
|
||||||
|
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
|
||||||
|
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
|
||||||
|
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
|
||||||
|
|
||||||
|
// define auxiliary functions for complex computations
|
||||||
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
|
||||||
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
|
||||||
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
|
||||||
@ -163,8 +172,8 @@ namespace Grid {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#include <simd/Grid_vector_types.h>
|
#include "simd/Grid_vector_types.h"
|
||||||
#include <simd/Grid_vector_unops.h>
|
#include "simd/Grid_vector_unops.h"
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
// Default precision
|
// Default precision
|
||||||
@ -228,6 +237,18 @@ namespace Grid {
|
|||||||
stream<<">";
|
stream<<">";
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
inline std::ostream& operator<< (std::ostream& stream, const vInteger &o){
|
||||||
|
int nn=vInteger::Nsimd();
|
||||||
|
std::vector<Integer,alignedAllocator<Integer> > buf(nn);
|
||||||
|
vstore(o,&buf[0]);
|
||||||
|
stream<<"<";
|
||||||
|
for(int i=0;i<nn;i++){
|
||||||
|
stream<<buf[i];
|
||||||
|
if(i<nn-1) stream<<",";
|
||||||
|
}
|
||||||
|
stream<<">";
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
247
lib/Stat.cc
Normal file
247
lib/Stat.cc
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
#include <Grid.h>
|
||||||
|
#include <PerfCount.h>
|
||||||
|
#include <Stat.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
|
||||||
|
bool PmuStat::pmu_initialized=false;
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::init(const char *regname)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
name = regname;
|
||||||
|
if (!pmu_initialized)
|
||||||
|
{
|
||||||
|
std::cout<<"initialising pmu"<<std::endl;
|
||||||
|
pmu_initialized = true;
|
||||||
|
pmu_init();
|
||||||
|
}
|
||||||
|
clear();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::clear(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
count = 0;
|
||||||
|
tregion = 0;
|
||||||
|
pmc0 = 0;
|
||||||
|
pmc1 = 0;
|
||||||
|
inst = 0;
|
||||||
|
cyc = 0;
|
||||||
|
ref = 0;
|
||||||
|
tcycles = 0;
|
||||||
|
reads = 0;
|
||||||
|
writes = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::print(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
std::cout <<"Reg "<<std::string(name)<<":\n";
|
||||||
|
std::cout <<" region "<<tregion<<std::endl;
|
||||||
|
std::cout <<" cycles "<<tcycles<<std::endl;
|
||||||
|
std::cout <<" inst "<<inst <<std::endl;
|
||||||
|
std::cout <<" cyc "<<cyc <<std::endl;
|
||||||
|
std::cout <<" ref "<<ref <<std::endl;
|
||||||
|
std::cout <<" pmc0 "<<pmc0 <<std::endl;
|
||||||
|
std::cout <<" pmc1 "<<pmc1 <<std::endl;
|
||||||
|
std::cout <<" count "<<count <<std::endl;
|
||||||
|
std::cout <<" reads "<<reads <<std::endl;
|
||||||
|
std::cout <<" writes "<<writes <<std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::start(void)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
pmu_start();
|
||||||
|
++count;
|
||||||
|
xmemctrs(&mrstart, &mwstart);
|
||||||
|
tstart = __rdtsc();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::enter(int t)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
counters[0][t] = __rdpmc(0);
|
||||||
|
counters[1][t] = __rdpmc(1);
|
||||||
|
counters[2][t] = __rdpmc((1<<30)|0);
|
||||||
|
counters[3][t] = __rdpmc((1<<30)|1);
|
||||||
|
counters[4][t] = __rdpmc((1<<30)|2);
|
||||||
|
counters[5][t] = __rdtsc();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::exit(int t)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
counters[0][t] = __rdpmc(0) - counters[0][t];
|
||||||
|
counters[1][t] = __rdpmc(1) - counters[1][t];
|
||||||
|
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
|
||||||
|
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
|
||||||
|
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
|
||||||
|
counters[5][t] = __rdtsc() - counters[5][t];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::accum(int nthreads)
|
||||||
|
{
|
||||||
|
#ifdef __x86_64__
|
||||||
|
tend = __rdtsc();
|
||||||
|
xmemctrs(&mrend, &mwend);
|
||||||
|
pmu_stop();
|
||||||
|
for (int t = 0; t < nthreads; ++t) {
|
||||||
|
pmc0 += counters[0][t];
|
||||||
|
pmc1 += counters[1][t];
|
||||||
|
inst += counters[2][t];
|
||||||
|
cyc += counters[3][t];
|
||||||
|
ref += counters[4][t];
|
||||||
|
tcycles += counters[5][t];
|
||||||
|
}
|
||||||
|
uint64_t region = tend - tstart;
|
||||||
|
tregion += region;
|
||||||
|
uint64_t mreads = mrend - mrstart;
|
||||||
|
reads += mreads;
|
||||||
|
uint64_t mwrites = mwend - mwstart;
|
||||||
|
writes += mwrites;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::pmu_fini(void) {}
|
||||||
|
void PmuStat::pmu_start(void) {};
|
||||||
|
void PmuStat::pmu_stop(void) {};
|
||||||
|
void PmuStat::pmu_init(void)
|
||||||
|
{
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
KNLsetup();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
|
||||||
|
{
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
ctrs c;
|
||||||
|
KNLreadctrs(c);
|
||||||
|
uint64_t emr = 0, emw = 0;
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
emr += c.edcrd[i];
|
||||||
|
emw += c.edcwr[i];
|
||||||
|
}
|
||||||
|
*mr = emr;
|
||||||
|
*mw = emw;
|
||||||
|
#else
|
||||||
|
*mr = *mw = 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
|
||||||
|
struct knl_gbl_ PmuStat::gbl;
|
||||||
|
|
||||||
|
#define PMU_MEM
|
||||||
|
|
||||||
|
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
|
||||||
|
{
|
||||||
|
char fname[1024];
|
||||||
|
snprintf(fname, sizeof(fname), "%s/type", ename);
|
||||||
|
FILE *fp = fopen(fname, "r");
|
||||||
|
if (fp == 0) {
|
||||||
|
::printf("open %s", fname);
|
||||||
|
::exit(0);
|
||||||
|
}
|
||||||
|
int type;
|
||||||
|
int ret = fscanf(fp, "%d", &type);
|
||||||
|
assert(ret == 1);
|
||||||
|
fclose(fp);
|
||||||
|
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
|
||||||
|
|
||||||
|
struct perf_event_attr hw = {};
|
||||||
|
hw.size = sizeof(hw);
|
||||||
|
hw.type = type;
|
||||||
|
// see /sys/devices/uncore_*/format/*
|
||||||
|
// All of the events we are interested in are configured the same way, but
|
||||||
|
// that isn't always true. Proper code would parse the format files
|
||||||
|
hw.config = event | (umask << 8);
|
||||||
|
//hw.read_format = PERF_FORMAT_GROUP;
|
||||||
|
// unfortunately the above only works within a single PMU; might
|
||||||
|
// as well just read them one at a time
|
||||||
|
int cpu = 0;
|
||||||
|
fd = perf_event_open(&hw, -1, cpu, -1, 0);
|
||||||
|
if (fd == -1) {
|
||||||
|
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
|
||||||
|
::exit(0);
|
||||||
|
} else {
|
||||||
|
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PmuStat::KNLsetup(void){
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
char fname[1024];
|
||||||
|
|
||||||
|
// MC RPQ inserts and WPQ inserts (reads & writes)
|
||||||
|
for (int mc = 0; mc < NMC; ++mc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
|
||||||
|
// RPQ Inserts
|
||||||
|
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
|
||||||
|
// WPQ Inserts
|
||||||
|
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
|
||||||
|
}
|
||||||
|
// EDC RPQ inserts and WPQ inserts
|
||||||
|
for (int edc=0; edc < NEDC; ++edc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
|
||||||
|
// RPQ inserts
|
||||||
|
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
|
||||||
|
// WPQ inserts
|
||||||
|
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
|
||||||
|
}
|
||||||
|
// EDC HitE, HitM, MissE, MissM
|
||||||
|
for (int edc=0; edc < NEDC; ++edc)
|
||||||
|
{
|
||||||
|
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
|
||||||
|
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
|
||||||
|
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
|
||||||
|
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
|
||||||
|
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t PmuStat::KNLreadctr(int fd)
|
||||||
|
{
|
||||||
|
uint64_t data;
|
||||||
|
size_t s = ::read(fd, &data, sizeof(data));
|
||||||
|
if (s != sizeof(uint64_t)){
|
||||||
|
::printf("read counter %lu", s);
|
||||||
|
::exit(0);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PmuStat::KNLreadctrs(ctrs &c)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NMC; ++i)
|
||||||
|
{
|
||||||
|
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
|
||||||
|
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
|
||||||
|
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NEDC; ++i)
|
||||||
|
{
|
||||||
|
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
|
||||||
|
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
|
||||||
|
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
|
||||||
|
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
104
lib/Stat.h
Normal file
104
lib/Stat.h
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
#ifndef _GRID_STAT_H
|
||||||
|
#define _GRID_STAT_H
|
||||||
|
|
||||||
|
#ifdef AVX512
|
||||||
|
#define _KNIGHTS_LANDING_ROOTONLY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Extra KNL counters from MCDRAM
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
#define NMC 6
|
||||||
|
#define NEDC 8
|
||||||
|
struct ctrs
|
||||||
|
{
|
||||||
|
uint64_t mcrd[NMC];
|
||||||
|
uint64_t mcwr[NMC];
|
||||||
|
uint64_t edcrd[NEDC];
|
||||||
|
uint64_t edcwr[NEDC];
|
||||||
|
uint64_t edchite[NEDC];
|
||||||
|
uint64_t edchitm[NEDC];
|
||||||
|
uint64_t edcmisse[NEDC];
|
||||||
|
uint64_t edcmissm[NEDC];
|
||||||
|
};
|
||||||
|
// Peter/Azusa:
|
||||||
|
// Our modification of a code provided by Larry Meadows from Intel
|
||||||
|
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
|
||||||
|
// so is already public and in the linux kernel for KNL.
|
||||||
|
struct knl_gbl_
|
||||||
|
{
|
||||||
|
int mc_rd[NMC];
|
||||||
|
int mc_wr[NMC];
|
||||||
|
int edc_rd[NEDC];
|
||||||
|
int edc_wr[NEDC];
|
||||||
|
int edc_hite[NEDC];
|
||||||
|
int edc_hitm[NEDC];
|
||||||
|
int edc_misse[NEDC];
|
||||||
|
int edc_missm[NEDC];
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
class PmuStat
|
||||||
|
{
|
||||||
|
uint64_t counters[8][256];
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
static struct knl_gbl_ gbl;
|
||||||
|
#endif
|
||||||
|
const char *name;
|
||||||
|
|
||||||
|
uint64_t reads; // memory reads
|
||||||
|
uint64_t writes; // memory writes
|
||||||
|
uint64_t mrstart; // memory read counter at start of parallel region
|
||||||
|
uint64_t mrend; // memory read counter at end of parallel region
|
||||||
|
uint64_t mwstart; // memory write counter at start of parallel region
|
||||||
|
uint64_t mwend; // memory write counter at end of parallel region
|
||||||
|
|
||||||
|
// cumulative counters
|
||||||
|
uint64_t count; // number of invocations
|
||||||
|
uint64_t tregion; // total time in parallel region (from thread 0)
|
||||||
|
uint64_t tcycles; // total cycles inside parallel region
|
||||||
|
uint64_t inst, ref, cyc; // fixed counters
|
||||||
|
uint64_t pmc0, pmc1;// pmu
|
||||||
|
// add memory counters here
|
||||||
|
// temp variables
|
||||||
|
uint64_t tstart; // tsc at start of parallel region
|
||||||
|
uint64_t tend; // tsc at end of parallel region
|
||||||
|
// map for ctrs values
|
||||||
|
// 0 pmc0 start
|
||||||
|
// 1 pmc0 end
|
||||||
|
// 2 pmc1 start
|
||||||
|
// 3 pmc1 end
|
||||||
|
// 4 tsc start
|
||||||
|
// 5 tsc end
|
||||||
|
static bool pmu_initialized;
|
||||||
|
public:
|
||||||
|
static bool is_init(void){ return pmu_initialized;}
|
||||||
|
static void pmu_init(void);
|
||||||
|
static void pmu_fini(void);
|
||||||
|
static void pmu_start(void);
|
||||||
|
static void pmu_stop(void);
|
||||||
|
void accum(int nthreads);
|
||||||
|
static void xmemctrs(uint64_t *mr, uint64_t *mw);
|
||||||
|
void start(void);
|
||||||
|
void enter(int t);
|
||||||
|
void exit(int t);
|
||||||
|
void print(void);
|
||||||
|
void init(const char *regname);
|
||||||
|
void clear(void);
|
||||||
|
#ifdef _KNIGHTS_LANDING_
|
||||||
|
static void KNLsetup(void);
|
||||||
|
static uint64_t KNLreadctr(int fd);
|
||||||
|
static void KNLreadctrs(ctrs &c);
|
||||||
|
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
1679
lib/Stencil.h
1679
lib/Stencil.h
File diff suppressed because it is too large
Load Diff
@ -30,22 +30,22 @@ Author: neo <cossu@post.kek.jp>
|
|||||||
#ifndef GRID_MATH_H
|
#ifndef GRID_MATH_H
|
||||||
#define GRID_MATH_H
|
#define GRID_MATH_H
|
||||||
|
|
||||||
#include <tensors/Tensor_traits.h>
|
#include <Grid/tensors/Tensor_traits.h>
|
||||||
#include <tensors/Tensor_class.h>
|
#include <Grid/tensors/Tensor_class.h>
|
||||||
#include <tensors/Tensor_arith.h>
|
#include <Grid/tensors/Tensor_arith.h>
|
||||||
#include <tensors/Tensor_inner.h>
|
#include <Grid/tensors/Tensor_inner.h>
|
||||||
#include <tensors/Tensor_outer.h>
|
#include <Grid/tensors/Tensor_outer.h>
|
||||||
#include <tensors/Tensor_transpose.h>
|
#include <Grid/tensors/Tensor_transpose.h>
|
||||||
#include <tensors/Tensor_trace.h>
|
#include <Grid/tensors/Tensor_trace.h>
|
||||||
#include <tensors/Tensor_index.h>
|
#include <Grid/tensors/Tensor_index.h>
|
||||||
#include <tensors/Tensor_Ta.h>
|
#include <Grid/tensors/Tensor_Ta.h>
|
||||||
#include <tensors/Tensor_determinant.h>
|
#include <Grid/tensors/Tensor_determinant.h>
|
||||||
#include <tensors/Tensor_exp.h>
|
#include <Grid/tensors/Tensor_exp.h>
|
||||||
//#include <tensors/Tensor_peek.h>
|
//#include <Grid/tensors/Tensor_peek.h>
|
||||||
//#include <tensors/Tensor_poke.h>
|
//#include <Grid/tensors/Tensor_poke.h>
|
||||||
#include <tensors/Tensor_reality.h>
|
#include <Grid/tensors/Tensor_reality.h>
|
||||||
#include <tensors/Tensor_unary.h>
|
#include <Grid/tensors/Tensor_unary.h>
|
||||||
#include <tensors/Tensor_extract_merge.h>
|
#include <Grid/tensors/Tensor_extract_merge.h>
|
||||||
#include <tensors/Tensor_logical.h>
|
#include <Grid/tensors/Tensor_logical.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -37,11 +37,20 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for ")
|
#ifdef GRID_NUMA
|
||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||||
#else
|
#else
|
||||||
#define PARALLEL_FOR_LOOP
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||||
|
#endif
|
||||||
|
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||||
|
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||||
|
#else
|
||||||
|
#define PARALLEL_FOR_LOOP
|
||||||
|
#define PARALLEL_FOR_LOOP_INTERN
|
||||||
#define PARALLEL_NESTED_LOOP2
|
#define PARALLEL_NESTED_LOOP2
|
||||||
|
#define PARALLEL_REGION
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
@ -123,6 +132,22 @@ class GridThread {
|
|||||||
ThreadBarrier();
|
ThreadBarrier();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void bcopy(const void *src, void *dst, size_t len) {
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
const char *c_src =(char *) src;
|
||||||
|
char *c_dest=(char *) dst;
|
||||||
|
int me,mywork,myoff;
|
||||||
|
GridThread::GetWorkBarrier(len,me, mywork,myoff);
|
||||||
|
bcopy(&c_src[myoff],&c_dest[myoff],mywork);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
bcopy(src,dst,len);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -31,7 +31,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||||
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
#define GRID_ALGORITHM_COARSENED_MATRIX_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -283,7 +282,7 @@ PARALLEL_FOR_LOOP
|
|||||||
} else if(SE->_is_local) {
|
} else if(SE->_is_local) {
|
||||||
nbr = in._odata[SE->_offset];
|
nbr = in._odata[SE->_offset];
|
||||||
} else {
|
} else {
|
||||||
nbr = Stencil.comm_buf[SE->_offset];
|
nbr = Stencil.CommBuf()[SE->_offset];
|
||||||
}
|
}
|
||||||
res = res + A[point]._odata[ss]*nbr;
|
res = res + A[point]._odata[ss]*nbr;
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||||
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
#define GRID_ALGORITHM_SPARSE_MATRIX_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,8 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_CHEBYSHEV_H
|
#ifndef GRID_CHEBYSHEV_H
|
||||||
#define GRID_CHEBYSHEV_H
|
#define GRID_CHEBYSHEV_H
|
||||||
|
|
||||||
#include<Grid.h>
|
#include <Grid/algorithms/LinearOperator.h>
|
||||||
#include<algorithms/LinearOperator.h>
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -18,10 +18,10 @@
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <Config.h>
|
#include <Config.h>
|
||||||
|
|
||||||
#ifdef HAVE_GMP_H
|
#ifdef HAVE_LIBGMP
|
||||||
#include <algorithms/approx/bigfloat.h>
|
#include "bigfloat.h"
|
||||||
#else
|
#else
|
||||||
#include <algorithms/approx/bigfloat_double.h>
|
#include "bigfloat_double.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
|
@ -1,150 +1,168 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_CONJUGATE_GRADIENT_H
|
#ifndef GRID_CONJUGATE_GRADIENT_H
|
||||||
#define GRID_CONJUGATE_GRADIENT_H
|
#define GRID_CONJUGATE_GRADIENT_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Base classes for iterative processes based on operators
|
// Base classes for iterative processes based on operators
|
||||||
// single input vec, single output vec.
|
// single input vec, single output vec.
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class Field>
|
template <class Field>
|
||||||
class ConjugateGradient : public OperatorFunction<Field> {
|
class ConjugateGradient : public OperatorFunction<Field> {
|
||||||
public:
|
public:
|
||||||
RealD Tolerance;
|
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
|
||||||
Integer MaxIterations;
|
// Defaults true.
|
||||||
ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
|
RealD Tolerance;
|
||||||
};
|
Integer MaxIterations;
|
||||||
|
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||||
|
: Tolerance(tol),
|
||||||
|
MaxIterations(maxit),
|
||||||
|
ErrorOnNoConverge(err_on_no_conv){};
|
||||||
|
|
||||||
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
|
||||||
|
Field &psi) {
|
||||||
|
psi.checkerboard = src.checkerboard;
|
||||||
|
conformable(psi, src);
|
||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
|
RealD cp, c, a, d, b, ssq, qq, b_pred;
|
||||||
|
|
||||||
psi.checkerboard = src.checkerboard;
|
Field p(src);
|
||||||
conformable(psi,src);
|
Field mmp(src);
|
||||||
|
Field r(src);
|
||||||
|
|
||||||
RealD cp,c,a,d,b,ssq,qq,b_pred;
|
// Initial residual computation & set up
|
||||||
|
RealD guess = norm2(psi);
|
||||||
Field p(src);
|
assert(std::isnan(guess) == 0);
|
||||||
Field mmp(src);
|
|
||||||
Field r(src);
|
|
||||||
|
|
||||||
//Initial residual computation & set up
|
|
||||||
RealD guess = norm2(psi);
|
|
||||||
assert(std::isnan(guess)==0);
|
|
||||||
|
|
||||||
Linop.HermOpAndNorm(psi,mmp,d,b);
|
|
||||||
|
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||||
r= src-mmp;
|
|
||||||
p= r;
|
|
||||||
|
|
||||||
a =norm2(p);
|
|
||||||
cp =a;
|
|
||||||
ssq=norm2(src);
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
|
r = src - mmp;
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
|
p = r;
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
|
|
||||||
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
|
|
||||||
|
|
||||||
RealD rsq = Tolerance* Tolerance*ssq;
|
a = norm2(p);
|
||||||
|
cp = a;
|
||||||
//Check if guess is really REALLY good :)
|
ssq = norm2(src);
|
||||||
if ( cp <= rsq ) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
|
|
||||||
|
|
||||||
GridStopWatch LinalgTimer;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
GridStopWatch MatrixTimer;
|
<< "ConjugateGradient: guess " << guess << std::endl;
|
||||||
GridStopWatch SolverTimer;
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: src " << ssq << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mp " << d << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: mmp " << b << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: cp,r " << cp << std::endl;
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: p " << a << std::endl;
|
||||||
|
|
||||||
SolverTimer.Start();
|
RealD rsq = Tolerance * Tolerance * ssq;
|
||||||
int k;
|
|
||||||
for (k=1;k<=MaxIterations;k++){
|
|
||||||
|
|
||||||
c=cp;
|
|
||||||
|
|
||||||
MatrixTimer.Start();
|
// Check if guess is really REALLY good :)
|
||||||
Linop.HermOpAndNorm(p,mmp,d,qq);
|
if (cp <= rsq) {
|
||||||
MatrixTimer.Stop();
|
return;
|
||||||
|
|
||||||
LinalgTimer.Start();
|
|
||||||
// RealD qqck = norm2(mmp);
|
|
||||||
// ComplexD dck = innerProduct(p,mmp);
|
|
||||||
|
|
||||||
a = c/d;
|
|
||||||
b_pred = a*(a*qq-d)/c;
|
|
||||||
|
|
||||||
cp = axpy_norm(r,-a,mmp,r);
|
|
||||||
b = cp/c;
|
|
||||||
|
|
||||||
// Fuse these loops ; should be really easy
|
|
||||||
psi= a*p+psi;
|
|
||||||
p = p*b+r;
|
|
||||||
|
|
||||||
LinalgTimer.Stop();
|
|
||||||
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
|
|
||||||
|
|
||||||
// Stopping condition
|
|
||||||
if ( cp <= rsq ) {
|
|
||||||
|
|
||||||
SolverTimer.Stop();
|
|
||||||
Linop.HermOpAndNorm(psi,mmp,d,qq);
|
|
||||||
p=mmp-src;
|
|
||||||
|
|
||||||
RealD mmpnorm = sqrt(norm2(mmp));
|
|
||||||
RealD psinorm = sqrt(norm2(psi));
|
|
||||||
RealD srcnorm = sqrt(norm2(src));
|
|
||||||
RealD resnorm = sqrt(norm2(p));
|
|
||||||
RealD true_residual = resnorm/srcnorm;
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
|
|
||||||
<<" computed residual "<<sqrt(cp/ssq)
|
|
||||||
<<" true residual " <<true_residual
|
|
||||||
<<" target "<<Tolerance<<std::endl;
|
|
||||||
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
|
|
||||||
std::cout<<std::endl;
|
|
||||||
|
|
||||||
assert(true_residual/Tolerance < 1000.0);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
std::cout << GridLogIterative << std::setprecision(4)
|
||||||
|
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
GridStopWatch LinalgTimer;
|
||||||
|
GridStopWatch MatrixTimer;
|
||||||
|
GridStopWatch SolverTimer;
|
||||||
|
|
||||||
|
SolverTimer.Start();
|
||||||
|
int k;
|
||||||
|
for (k = 1; k <= MaxIterations; k++) {
|
||||||
|
c = cp;
|
||||||
|
|
||||||
|
MatrixTimer.Start();
|
||||||
|
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||||
|
MatrixTimer.Stop();
|
||||||
|
|
||||||
|
LinalgTimer.Start();
|
||||||
|
// RealD qqck = norm2(mmp);
|
||||||
|
// ComplexD dck = innerProduct(p,mmp);
|
||||||
|
|
||||||
|
a = c / d;
|
||||||
|
b_pred = a * (a * qq - d) / c;
|
||||||
|
|
||||||
|
cp = axpy_norm(r, -a, mmp, r);
|
||||||
|
b = cp / c;
|
||||||
|
|
||||||
|
// Fuse these loops ; should be really easy
|
||||||
|
psi = a * p + psi;
|
||||||
|
p = p * b + r;
|
||||||
|
|
||||||
|
LinalgTimer.Stop();
|
||||||
|
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||||
|
<< " residual " << cp << " target " << rsq << std::endl;
|
||||||
|
|
||||||
|
// Stopping condition
|
||||||
|
if (cp <= rsq) {
|
||||||
|
SolverTimer.Stop();
|
||||||
|
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
|
p = mmp - src;
|
||||||
|
|
||||||
|
RealD mmpnorm = sqrt(norm2(mmp));
|
||||||
|
RealD psinorm = sqrt(norm2(psi));
|
||||||
|
RealD srcnorm = sqrt(norm2(src));
|
||||||
|
RealD resnorm = sqrt(norm2(p));
|
||||||
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
|
||||||
|
<< " true residual " << true_residual << " target "
|
||||||
|
<< Tolerance << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Time elapsed: Iterations "
|
||||||
|
<< SolverTimer.Elapsed() << " Matrix "
|
||||||
|
<< MatrixTimer.Elapsed() << " Linalg "
|
||||||
|
<< LinalgTimer.Elapsed();
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
|
||||||
|
<< std::endl;
|
||||||
|
if (ErrorOnNoConverge) assert(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
142
lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
142
lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||||
|
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
//Mixed precision restarted defect correction CG
|
||||||
|
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
|
||||||
|
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
|
||||||
|
public:
|
||||||
|
RealD Tolerance;
|
||||||
|
Integer MaxInnerIterations;
|
||||||
|
Integer MaxOuterIterations;
|
||||||
|
GridBase* SinglePrecGrid; //Grid for single-precision fields
|
||||||
|
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||||
|
LinearOperatorBase<FieldF> &Linop_f;
|
||||||
|
LinearOperatorBase<FieldD> &Linop_d;
|
||||||
|
|
||||||
|
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
|
||||||
|
LinearFunction<FieldF> *guesser;
|
||||||
|
|
||||||
|
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
|
||||||
|
Linop_f(_Linop_f), Linop_d(_Linop_d),
|
||||||
|
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
|
||||||
|
OuterLoopNormMult(100.), guesser(NULL){ };
|
||||||
|
|
||||||
|
void useGuesser(LinearFunction<FieldF> &g){
|
||||||
|
guesser = &g;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const FieldD &src_d_in, FieldD &sol_d){
|
||||||
|
GridStopWatch TotalTimer;
|
||||||
|
TotalTimer.Start();
|
||||||
|
|
||||||
|
int cb = src_d_in.checkerboard;
|
||||||
|
sol_d.checkerboard = cb;
|
||||||
|
|
||||||
|
RealD src_norm = norm2(src_d_in);
|
||||||
|
RealD stop = src_norm * Tolerance*Tolerance;
|
||||||
|
|
||||||
|
GridBase* DoublePrecGrid = src_d_in._grid;
|
||||||
|
FieldD tmp_d(DoublePrecGrid);
|
||||||
|
tmp_d.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldD tmp2_d(DoublePrecGrid);
|
||||||
|
tmp2_d.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldD src_d(DoublePrecGrid);
|
||||||
|
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
|
||||||
|
|
||||||
|
RealD inner_tol = Tolerance;
|
||||||
|
|
||||||
|
FieldF src_f(SinglePrecGrid);
|
||||||
|
src_f.checkerboard = cb;
|
||||||
|
|
||||||
|
FieldF sol_f(SinglePrecGrid);
|
||||||
|
sol_f.checkerboard = cb;
|
||||||
|
|
||||||
|
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
|
||||||
|
CG_f.ErrorOnNoConverge = false;
|
||||||
|
|
||||||
|
GridStopWatch InnerCGtimer;
|
||||||
|
|
||||||
|
GridStopWatch PrecChangeTimer;
|
||||||
|
|
||||||
|
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
|
||||||
|
//Compute double precision rsd and also new RHS vector.
|
||||||
|
Linop_d.HermOp(sol_d, tmp_d);
|
||||||
|
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
|
||||||
|
|
||||||
|
if(norm < OuterLoopNormMult * stop){
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
|
||||||
|
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(src_f, src_d);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
zeroit(sol_f);
|
||||||
|
|
||||||
|
//Optionally improve inner solver guess (eg using known eigenvectors)
|
||||||
|
if(guesser != NULL)
|
||||||
|
(*guesser)(src_f, sol_f);
|
||||||
|
|
||||||
|
//Inner CG
|
||||||
|
CG_f.Tolerance = inner_tol;
|
||||||
|
InnerCGtimer.Start();
|
||||||
|
CG_f(Linop_f, src_f, sol_f);
|
||||||
|
InnerCGtimer.Stop();
|
||||||
|
|
||||||
|
//Convert sol back to double and add to double prec solution
|
||||||
|
PrecChangeTimer.Start();
|
||||||
|
precisionChange(tmp_d, sol_f);
|
||||||
|
PrecChangeTimer.Stop();
|
||||||
|
|
||||||
|
axpy(sol_d, 1.0, tmp_d, sol_d);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Final trial CG
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
|
||||||
|
|
||||||
|
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
|
||||||
|
CG_d(Linop_d, src_d_in, sol_d);
|
||||||
|
|
||||||
|
TotalTimer.Stop();
|
||||||
|
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -130,8 +130,8 @@ DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#include <algorithms/iterative/Householder.h>
|
#include "Householder.h"
|
||||||
#include <algorithms/iterative/Francis.h>
|
#include "Francis.h"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -31,10 +31,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#include <string.h> //memset
|
#include <string.h> //memset
|
||||||
#ifdef USE_LAPACK
|
#ifdef USE_LAPACK
|
||||||
#include <lapacke.h>
|
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
|
||||||
|
double *vl, double *vu, int *il, int *iu, double *abstol,
|
||||||
|
int *m, double *w, double *z, int *ldz, int *isuppz,
|
||||||
|
double *work, int *lwork, int *iwork, int *liwork,
|
||||||
|
int *info);
|
||||||
#endif
|
#endif
|
||||||
#include <algorithms/iterative/DenseMatrix.h>
|
#include "DenseMatrix.h"
|
||||||
#include <algorithms/iterative/EigenSort.h>
|
#include "EigenSort.h"
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_CARTESIAN_BASE_H
|
#ifndef GRID_CARTESIAN_BASE_H
|
||||||
#define GRID_CARTESIAN_BASE_H
|
#define GRID_CARTESIAN_BASE_H
|
||||||
|
|
||||||
#include <Grid.h>
|
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
|
|
||||||
@ -78,15 +77,12 @@ public:
|
|||||||
// GridCartesian / GridRedBlackCartesian
|
// GridCartesian / GridRedBlackCartesian
|
||||||
////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////
|
||||||
virtual int CheckerBoarded(int dim)=0;
|
virtual int CheckerBoarded(int dim)=0;
|
||||||
virtual int CheckerBoard(std::vector<int> site)=0;
|
virtual int CheckerBoard(std::vector<int> &site)=0;
|
||||||
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
|
||||||
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
|
||||||
int CheckerBoardFromOindex (int Oindex){
|
virtual int CheckerBoardFromOindex (int Oindex)=0;
|
||||||
std::vector<int> ocoor;
|
virtual int CheckerBoardFromOindexTable (int Oindex)=0;
|
||||||
oCoorFromOindex(ocoor,Oindex);
|
|
||||||
return CheckerBoard(ocoor);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Local layout calculations
|
// Local layout calculations
|
||||||
@ -107,6 +103,12 @@ public:
|
|||||||
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
virtual int iIndex(std::vector<int> &lcoor)
|
||||||
|
{
|
||||||
|
int idx=0;
|
||||||
|
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
inline int oIndexReduced(std::vector<int> &ocoor)
|
inline int oIndexReduced(std::vector<int> &ocoor)
|
||||||
{
|
{
|
||||||
int idx=0;
|
int idx=0;
|
||||||
@ -123,12 +125,6 @@ public:
|
|||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
// SIMD lane addressing
|
// SIMD lane addressing
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
inline int iIndex(std::vector<int> &lcoor)
|
|
||||||
{
|
|
||||||
int idx=0;
|
|
||||||
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
inline void iCoorFromIindex(std::vector<int> &coor,int lane)
|
||||||
{
|
{
|
||||||
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
|
||||||
@ -220,7 +216,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
|
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
|
||||||
o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim
|
o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim
|
||||||
}
|
}
|
||||||
|
|
||||||
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
||||||
|
@ -39,10 +39,17 @@ class GridCartesian: public GridBase {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
virtual int CheckerBoard(std::vector<int> site){
|
virtual int CheckerBoard(std::vector<int> &site){
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
virtual int CheckerBoardDestination(int cb,int shift,int dim){
|
||||||
|
@ -32,29 +32,24 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
static const int CbRed =0;
|
static const int CbRed =0;
|
||||||
static const int CbBlack=1;
|
static const int CbBlack=1;
|
||||||
static const int Even =CbRed;
|
static const int Even =CbRed;
|
||||||
static const int Odd =CbBlack;
|
static const int Odd =CbBlack;
|
||||||
|
|
||||||
// Perhaps these are misplaced and
|
|
||||||
// should be in sparse matrix.
|
|
||||||
// Also should make these a named enum type
|
|
||||||
static const int DaggerNo=0;
|
|
||||||
static const int DaggerYes=1;
|
|
||||||
|
|
||||||
// Specialise this for red black grids storing half the data like a chess board.
|
// Specialise this for red black grids storing half the data like a chess board.
|
||||||
class GridRedBlackCartesian : public GridBase
|
class GridRedBlackCartesian : public GridBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
std::vector<int> _checker_dim_mask;
|
std::vector<int> _checker_dim_mask;
|
||||||
int _checker_dim;
|
int _checker_dim;
|
||||||
|
std::vector<int> _checker_board;
|
||||||
|
|
||||||
virtual int CheckerBoarded(int dim){
|
virtual int CheckerBoarded(int dim){
|
||||||
if( dim==_checker_dim) return 1;
|
if( dim==_checker_dim) return 1;
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
virtual int CheckerBoard(std::vector<int> site){
|
virtual int CheckerBoard(std::vector<int> &site){
|
||||||
int linear=0;
|
int linear=0;
|
||||||
assert(site.size()==_ndimension);
|
assert(site.size()==_ndimension);
|
||||||
for(int d=0;d<_ndimension;d++){
|
for(int d=0;d<_ndimension;d++){
|
||||||
@ -78,12 +73,20 @@ public:
|
|||||||
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
// or by looping over x,y,z and multiply rather than computing checkerboard.
|
||||||
|
|
||||||
if ( (source_cb+ocb)&1 ) {
|
if ( (source_cb+ocb)&1 ) {
|
||||||
|
|
||||||
return (shift)/2;
|
return (shift)/2;
|
||||||
} else {
|
} else {
|
||||||
return (shift+1)/2;
|
return (shift+1)/2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
virtual int CheckerBoardFromOindexTable (int Oindex) {
|
||||||
|
return _checker_board[Oindex];
|
||||||
|
}
|
||||||
|
virtual int CheckerBoardFromOindex (int Oindex)
|
||||||
|
{
|
||||||
|
std::vector<int> ocoor;
|
||||||
|
oCoorFromOindex(ocoor,Oindex);
|
||||||
|
return CheckerBoard(ocoor);
|
||||||
|
}
|
||||||
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
|
||||||
|
|
||||||
if(dim != _checker_dim) return shift;
|
if(dim != _checker_dim) return shift;
|
||||||
@ -175,7 +178,7 @@ public:
|
|||||||
// all elements of a simd vector must have same checkerboard.
|
// all elements of a simd vector must have same checkerboard.
|
||||||
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
// If Ls vectorised, this must still be the case; e.g. dwf rb5d
|
||||||
if ( _simd_layout[d]>1 ) {
|
if ( _simd_layout[d]>1 ) {
|
||||||
if ( d != _checker_dim ) {
|
if ( checker_dim_mask[d] ) {
|
||||||
assert( (_rdimensions[d]&0x1) == 0 );
|
assert( (_rdimensions[d]&0x1) == 0 );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -191,6 +194,8 @@ public:
|
|||||||
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
_ostride[d] = _ostride[d-1]*_rdimensions[d-1];
|
||||||
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
_istride[d] = _istride[d-1]*_simd_layout[d-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -211,6 +216,18 @@ public:
|
|||||||
_slice_nblock[d]=nblock;
|
_slice_nblock[d]=nblock;
|
||||||
block = block*_rdimensions[d];
|
block = block*_rdimensions[d];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Create a checkerboard lookup table
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
int rvol = 1;
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
rvol=rvol * _rdimensions[d];
|
||||||
|
}
|
||||||
|
_checker_board.resize(rvol);
|
||||||
|
for(int osite=0;osite<_osites;osite++){
|
||||||
|
_checker_board[osite] = CheckerBoardFromOindex (osite);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
protected:
|
protected:
|
||||||
@ -224,9 +241,21 @@ protected:
|
|||||||
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return idx;
|
return idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
virtual int iIndex(std::vector<int> &lcoor)
|
||||||
|
{
|
||||||
|
int idx=0;
|
||||||
|
for(int d=0;d<_ndimension;d++) {
|
||||||
|
if( d==_checker_dim ) {
|
||||||
|
idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d]));
|
||||||
|
} else {
|
||||||
|
idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
124
lib/communicator/Communicator_base.cc
Normal file
124
lib/communicator/Communicator_base.cc
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_none.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void * CartesianCommunicator::ShmCommBuf;
|
||||||
|
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
|
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// Alloc, free shmem region
|
||||||
|
/////////////////////////////////
|
||||||
|
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
|
||||||
|
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||||
|
void *ptr = (void *)heap_top;
|
||||||
|
heap_top += bytes;
|
||||||
|
heap_bytes+= bytes;
|
||||||
|
if (heap_bytes >= MAX_MPI_SHM_BYTES) {
|
||||||
|
std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm <MB> flag" <<std::endl;
|
||||||
|
std::cout<< " Parameter specified in units of MB (megabytes) " <<std::endl;
|
||||||
|
std::cout<< " Current value is " << (MAX_MPI_SHM_BYTES/(1024*1024)) <<std::endl;
|
||||||
|
assert(heap_bytes<MAX_MPI_SHM_BYTES);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShmBufferFreeAll(void) {
|
||||||
|
heap_top =(size_t)ShmBufferSelf();
|
||||||
|
heap_bytes=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////////
|
||||||
|
// Grid information queries
|
||||||
|
/////////////////////////////////
|
||||||
|
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
|
||||||
|
int CartesianCommunicator::BossRank(void) { return 0; };
|
||||||
|
int CartesianCommunicator::ThisRank(void) { return _processor; };
|
||||||
|
const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
|
||||||
|
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
|
||||||
|
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(ComplexF &c)
|
||||||
|
{
|
||||||
|
GlobalSumVector((float *)&c,2);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
|
||||||
|
{
|
||||||
|
GlobalSumVector((float *)c,2*N);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(ComplexD &c)
|
||||||
|
{
|
||||||
|
GlobalSumVector((double *)&c,2);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||||
|
{
|
||||||
|
GlobalSumVector((double *)c,2*N);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||||
|
{
|
||||||
|
SendToRecvFromComplete(waitall);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::StencilBarrier(void){};
|
||||||
|
|
||||||
|
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShmInitGeneric(void){
|
||||||
|
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
||||||
|
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -34,123 +35,196 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
|
#ifdef GRID_COMMS_MPI3L
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
#include <mpp/shmem.h>
|
#include <mpp/shmem.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
class CartesianCommunicator {
|
class CartesianCommunicator {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
// 65536 ranks per node adequate for now
|
||||||
|
// 128MB shared memory for comms enought for 48^4 local vol comms
|
||||||
|
// Give external control (command line override?) of this
|
||||||
|
|
||||||
|
static const int MAXLOG2RANKSPERNODE = 16;
|
||||||
|
static uint64_t MAX_MPI_SHM_BYTES;
|
||||||
|
|
||||||
// Communicator should know nothing of the physics grid, only processor grid.
|
// Communicator should know nothing of the physics grid, only processor grid.
|
||||||
|
int _Nprocessors; // How many in all
|
||||||
|
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||||
|
int _processor; // linear processor rank
|
||||||
|
std::vector<int> _processor_coor; // linear processor coordinate
|
||||||
|
unsigned long _ndimension;
|
||||||
|
|
||||||
int _Nprocessors; // How many in all
|
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L)
|
||||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
static MPI_Comm communicator_world;
|
||||||
int _processor; // linear processor rank
|
MPI_Comm communicator;
|
||||||
std::vector<int> _processor_coor; // linear processor coordinate
|
typedef MPI_Request CommsRequest_t;
|
||||||
unsigned long _ndimension;
|
|
||||||
|
|
||||||
#ifdef GRID_COMMS_MPI
|
|
||||||
MPI_Comm communicator;
|
|
||||||
typedef MPI_Request CommsRequest_t;
|
|
||||||
#else
|
#else
|
||||||
typedef int CommsRequest_t;
|
typedef int CommsRequest_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void Init(int *argc, char ***argv);
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// Helper functionality for SHM Windows common to all other impls
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// Longer term; drop this in favour of a master / slave model with
|
||||||
|
// cartesian communicator on a subset of ranks, slave ranks controlled
|
||||||
|
// by group leader with data xfer via shared memory
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
|
||||||
// Constructor
|
static int ShmRank;
|
||||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
static int ShmSize;
|
||||||
|
static int GroupRank;
|
||||||
|
static int GroupSize;
|
||||||
|
static int WorldRank;
|
||||||
|
static int WorldSize;
|
||||||
|
|
||||||
// Wraps MPI_Cart routines
|
std::vector<int> WorldDims;
|
||||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
std::vector<int> GroupDims;
|
||||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
std::vector<int> ShmDims;
|
||||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
|
||||||
|
std::vector<int> GroupCoor;
|
||||||
|
std::vector<int> ShmCoor;
|
||||||
|
std::vector<int> WorldCoor;
|
||||||
|
|
||||||
/////////////////////////////////
|
static std::vector<int> GroupRanks;
|
||||||
// Grid information queries
|
static std::vector<int> MyGroup;
|
||||||
/////////////////////////////////
|
static int ShmSetup;
|
||||||
int IsBoss(void) { return _processor==0; };
|
static MPI_Win ShmWindow;
|
||||||
int BossRank(void) { return 0; };
|
static MPI_Comm ShmComm;
|
||||||
int ThisRank(void) { return _processor; };
|
|
||||||
const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };
|
std::vector<int> LexicographicToWorldRank;
|
||||||
const std::vector<int> & ProcessorGrid(void) { return _processors; };
|
|
||||||
int ProcessorCount(void) { return _Nprocessors; };
|
static std::vector<void *> ShmCommBufs;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
#else
|
||||||
// Reduction
|
static void ShmInitGeneric(void);
|
||||||
////////////////////////////////////////////////////////////
|
static commVector<uint8_t> ShmBufStorageVector;
|
||||||
void GlobalSum(RealF &);
|
#endif
|
||||||
void GlobalSumVector(RealF *,int N);
|
|
||||||
|
|
||||||
void GlobalSum(RealD &);
|
/////////////////////////////////
|
||||||
void GlobalSumVector(RealD *,int N);
|
// Grid information and queries
|
||||||
|
// Implemented in Communicator_base.C
|
||||||
|
/////////////////////////////////
|
||||||
|
static void * ShmCommBuf;
|
||||||
|
size_t heap_top;
|
||||||
|
size_t heap_bytes;
|
||||||
|
|
||||||
void GlobalSum(uint32_t &);
|
void *ShmBufferSelf(void);
|
||||||
void GlobalSum(uint64_t &);
|
void *ShmBuffer(int rank);
|
||||||
|
void *ShmBufferTranslate(int rank,void * local_p);
|
||||||
|
void *ShmBufferMalloc(size_t bytes);
|
||||||
|
void ShmBufferFreeAll(void) ;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Must call in Grid startup
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
static void Init(int *argc, char ***argv);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
// Constructor of any given grid
|
||||||
|
////////////////////////////////////////////////
|
||||||
|
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||||
|
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||||
|
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||||
|
|
||||||
|
int IsBoss(void) ;
|
||||||
|
int BossRank(void) ;
|
||||||
|
int ThisRank(void) ;
|
||||||
|
const std::vector<int> & ThisProcessorCoor(void) ;
|
||||||
|
const std::vector<int> & ProcessorGrid(void) ;
|
||||||
|
int ProcessorCount(void) ;
|
||||||
|
|
||||||
void GlobalSum(ComplexF &c)
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
{
|
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||||
GlobalSumVector((float *)&c,2);
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
}
|
static int RankWorld(void) ;
|
||||||
void GlobalSumVector(ComplexF *c,int N)
|
static void BroadcastWorld(int root,void* data, int bytes);
|
||||||
{
|
|
||||||
GlobalSumVector((float *)c,2*N);
|
////////////////////////////////////////////////////////////
|
||||||
}
|
// Reduction
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void GlobalSum(RealF &);
|
||||||
|
void GlobalSumVector(RealF *,int N);
|
||||||
|
void GlobalSum(RealD &);
|
||||||
|
void GlobalSumVector(RealD *,int N);
|
||||||
|
void GlobalSum(uint32_t &);
|
||||||
|
void GlobalSum(uint64_t &);
|
||||||
|
void GlobalSum(ComplexF &c);
|
||||||
|
void GlobalSumVector(ComplexF *c,int N);
|
||||||
|
void GlobalSum(ComplexD &c);
|
||||||
|
void GlobalSumVector(ComplexD *c,int N);
|
||||||
|
|
||||||
|
template<class obj> void GlobalSum(obj &o){
|
||||||
|
typedef typename obj::scalar_type scalar_type;
|
||||||
|
int words = sizeof(obj)/sizeof(scalar_type);
|
||||||
|
scalar_type * ptr = (scalar_type *)& o;
|
||||||
|
GlobalSumVector(ptr,words);
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Face exchange, buffer swap in translational invariant way
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
void SendToRecvFrom(void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int xmit_to_rank,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int xmit_to_rank,
|
||||||
|
void *recv,
|
||||||
|
int recv_from_rank,
|
||||||
|
int bytes);
|
||||||
|
|
||||||
|
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||||
|
|
||||||
void GlobalSum(ComplexD &c)
|
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
{
|
void *xmit,
|
||||||
GlobalSumVector((double *)&c,2);
|
int xmit_to_rank,
|
||||||
}
|
void *recv,
|
||||||
void GlobalSumVector(ComplexD *c,int N)
|
int recv_from_rank,
|
||||||
{
|
int bytes);
|
||||||
GlobalSumVector((double *)c,2*N);
|
|
||||||
}
|
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||||
|
void StencilBarrier(void);
|
||||||
template<class obj> void GlobalSum(obj &o){
|
|
||||||
typedef typename obj::scalar_type scalar_type;
|
|
||||||
int words = sizeof(obj)/sizeof(scalar_type);
|
|
||||||
scalar_type * ptr = (scalar_type *)& o;
|
|
||||||
GlobalSumVector(ptr,words);
|
|
||||||
}
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
// Face exchange, buffer swap in translational invariant way
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
void SendToRecvFrom(void *xmit,
|
|
||||||
int xmit_to_rank,
|
|
||||||
void *recv,
|
|
||||||
int recv_from_rank,
|
|
||||||
int bytes);
|
|
||||||
|
|
||||||
void SendRecvPacket(void *xmit,
|
////////////////////////////////////////////////////////////
|
||||||
void *recv,
|
// Barrier
|
||||||
int xmit_to_rank,
|
////////////////////////////////////////////////////////////
|
||||||
int recv_from_rank,
|
void Barrier(void);
|
||||||
int bytes);
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
// Broadcast a buffer and composite larger
|
||||||
void *xmit,
|
////////////////////////////////////////////////////////////
|
||||||
int xmit_to_rank,
|
void Broadcast(int root,void* data, int bytes);
|
||||||
void *recv,
|
|
||||||
int recv_from_rank,
|
template<class obj> void Broadcast(int root,obj &data)
|
||||||
int bytes);
|
|
||||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
// Barrier
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
void Barrier(void);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
// Broadcast a buffer and composite larger
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
void Broadcast(int root,void* data, int bytes);
|
|
||||||
template<class obj> void Broadcast(int root,obj &data)
|
|
||||||
{
|
{
|
||||||
Broadcast(root,(void *)&data,sizeof(data));
|
Broadcast(root,(void *)&data,sizeof(data));
|
||||||
};
|
};
|
||||||
|
|
||||||
static void BroadcastWorld(int root,void* data, int bytes);
|
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,21 +30,23 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
// Should error check all MPI calls.
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm CartesianCommunicator::communicator_world;
|
||||||
|
|
||||||
|
// Should error check all MPI calls.
|
||||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
int flag;
|
int flag;
|
||||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
if ( !flag ) {
|
if ( !flag ) {
|
||||||
MPI_Init(argc,argv);
|
MPI_Init(argc,argv);
|
||||||
}
|
}
|
||||||
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||||
|
ShmInitGeneric();
|
||||||
}
|
}
|
||||||
|
|
||||||
int Rank(void) {
|
|
||||||
int pe;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
|
||||||
return pe;
|
|
||||||
}
|
|
||||||
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
_ndimension = processors.size();
|
_ndimension = processors.size();
|
||||||
@ -54,7 +56,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|||||||
_processors = processors;
|
_processors = processors;
|
||||||
_processor_coor.resize(_ndimension);
|
_processor_coor.resize(_ndimension);
|
||||||
|
|
||||||
MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||||
MPI_Comm_rank(communicator,&_processor);
|
MPI_Comm_rank(communicator,&_processor);
|
||||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||||
|
|
||||||
@ -67,7 +69,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|||||||
|
|
||||||
assert(Size==_Nprocessors);
|
assert(Size==_Nprocessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
@ -168,7 +169,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
|||||||
int nreq=list.size();
|
int nreq=list.size();
|
||||||
std::vector<MPI_Status> status(nreq);
|
std::vector<MPI_Status> status(nreq);
|
||||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -187,14 +187,22 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
|||||||
communicator);
|
communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
// Should only be used prior to Grid Init finished.
|
||||||
|
// Check for this?
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
int CartesianCommunicator::RankWorld(void){
|
||||||
|
int r;
|
||||||
|
MPI_Comm_rank(communicator_world,&r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
{
|
{
|
||||||
int ierr= MPI_Bcast(data,
|
int ierr= MPI_Bcast(data,
|
||||||
bytes,
|
bytes,
|
||||||
MPI_BYTE,
|
MPI_BYTE,
|
||||||
root,
|
root,
|
||||||
MPI_COMM_WORLD);
|
communicator_world);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
580
lib/communicator/Communicator_mpi3.cc
Normal file
580
lib/communicator/Communicator_mpi3.cc
Normal file
@ -0,0 +1,580 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int CartesianCommunicator::ShmSetup = 0;
|
||||||
|
|
||||||
|
int CartesianCommunicator::ShmRank;
|
||||||
|
int CartesianCommunicator::ShmSize;
|
||||||
|
int CartesianCommunicator::GroupRank;
|
||||||
|
int CartesianCommunicator::GroupSize;
|
||||||
|
int CartesianCommunicator::WorldRank;
|
||||||
|
int CartesianCommunicator::WorldSize;
|
||||||
|
|
||||||
|
MPI_Comm CartesianCommunicator::communicator_world;
|
||||||
|
MPI_Comm CartesianCommunicator::ShmComm;
|
||||||
|
MPI_Win CartesianCommunicator::ShmWindow;
|
||||||
|
|
||||||
|
std::vector<int> CartesianCommunicator::GroupRanks;
|
||||||
|
std::vector<int> CartesianCommunicator::MyGroup;
|
||||||
|
std::vector<void *> CartesianCommunicator::ShmCommBufs;
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||||
|
{
|
||||||
|
return ShmCommBufs[ShmRank];
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||||
|
{
|
||||||
|
int gpeer = GroupRanks[rank];
|
||||||
|
if (gpeer == MPI_UNDEFINED){
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
return ShmCommBufs[gpeer];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
|
||||||
|
{
|
||||||
|
int gpeer = GroupRanks[rank];
|
||||||
|
if (gpeer == MPI_UNDEFINED){
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
|
||||||
|
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
|
||||||
|
return (void *) remote;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
|
int flag;
|
||||||
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
|
if ( !flag ) {
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||||
|
MPI_Comm_rank(communicator_world,&WorldRank);
|
||||||
|
MPI_Comm_size(communicator_world,&WorldSize);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Split into groups that can share memory
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||||
|
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||||
|
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||||
|
GroupSize = WorldSize/ShmSize;
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Group WorldGroup, ShmGroup;
|
||||||
|
MPI_Comm_group (communicator_world, &WorldGroup);
|
||||||
|
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||||
|
|
||||||
|
std::vector<int> world_ranks(WorldSize);
|
||||||
|
GroupRanks.resize(WorldSize);
|
||||||
|
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||||
|
|
||||||
|
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Identify who is in my group and noninate the leader
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int g=0;
|
||||||
|
MyGroup.resize(ShmSize);
|
||||||
|
for(int rank=0;rank<WorldSize;rank++){
|
||||||
|
if(GroupRanks[rank]!=MPI_UNDEFINED){
|
||||||
|
assert(g<ShmSize);
|
||||||
|
MyGroup[g++] = rank;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
|
||||||
|
int myleader = MyGroup[0];
|
||||||
|
|
||||||
|
std::vector<int> leaders_1hot(WorldSize,0);
|
||||||
|
std::vector<int> leaders_group(GroupSize,0);
|
||||||
|
leaders_1hot [ myleader ] = 1;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// global sum leaders over comm world
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// find the group leaders world rank
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int group=0;
|
||||||
|
for(int l=0;l<WorldSize;l++){
|
||||||
|
if(leaders_1hot[l]){
|
||||||
|
leaders_group[group++] = l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Identify the rank of the group in which I (and my leader) live
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
GroupRank=-1;
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
if (myleader == leaders_group[g]){
|
||||||
|
GroupRank=g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(GroupRank!=-1);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared window for our group
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
ShmCommBuf = 0;
|
||||||
|
ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
|
||||||
|
assert(ierr==0);
|
||||||
|
// KNL hack -- force to numa-domain 1 in flat
|
||||||
|
#if 0
|
||||||
|
//#include <numaif.h>
|
||||||
|
for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
|
||||||
|
void *pages = (void *) ( page + ShmCommBuf );
|
||||||
|
int status;
|
||||||
|
int flags=MPOL_MF_MOVE_ALL;
|
||||||
|
int nodes=1; // numa domain == MCDRAM
|
||||||
|
unsigned long count=1;
|
||||||
|
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||||
|
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
ShmCommBufs.resize(ShmSize);
|
||||||
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
MPI_Aint sz;
|
||||||
|
int dsp_unit;
|
||||||
|
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Verbose for now
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
if (WorldRank == 0){
|
||||||
|
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
|
||||||
|
std::cout<< WorldSize << " Ranks " ;
|
||||||
|
std::cout<< GroupSize << " Nodes " ;
|
||||||
|
std::cout<< ShmSize << " with ranks-per-node "<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
|
||||||
|
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
|
||||||
|
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<<leaders_group[g]<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage<<" Boss Node Shm Pointers are {";
|
||||||
|
for(int g=0;g<ShmSize;g++){
|
||||||
|
std::cout<<std::hex<<ShmCommBufs[g]<<std::dec;
|
||||||
|
if(g!=ShmSize-1) std::cout<<",";
|
||||||
|
else std::cout<<"}"<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
if ( (ShmRank == 0) && (GroupRank==g) ) std::cout<<GridLogMessage<<"["<<g<<"] Node Group "<<g<<" is ranks {";
|
||||||
|
for(int r=0;r<ShmSize;r++){
|
||||||
|
if ( (ShmRank == 0) && (GroupRank==g) ) {
|
||||||
|
std::cout<<MyGroup[r];
|
||||||
|
if(r<ShmSize-1) std::cout<<",";
|
||||||
|
else std::cout<<"}"<<std::endl;
|
||||||
|
}
|
||||||
|
MPI_Barrier(communicator_world);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(ShmSetup==0); ShmSetup=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
{
|
||||||
|
std::vector<int> coor = _processor_coor;
|
||||||
|
|
||||||
|
assert(std::abs(shift) <_processors[dim]);
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,source,_processors);
|
||||||
|
source = LexicographicToWorldRank[source];
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,dest,_processors);
|
||||||
|
dest = LexicographicToWorldRank[dest];
|
||||||
|
}
|
||||||
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
int rank;
|
||||||
|
Lexicographic::IndexFromCoor(coor,rank,_processors);
|
||||||
|
rank = LexicographicToWorldRank[rank];
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
Lexicographic::CoorFromIndex(coor,rank,_processors);
|
||||||
|
rank = LexicographicToWorldRank[rank];
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
|
{
|
||||||
|
int ierr;
|
||||||
|
|
||||||
|
communicator=communicator_world;
|
||||||
|
|
||||||
|
_ndimension = processors.size();
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Assert power of two shm_size.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int log2size = -1;
|
||||||
|
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||||
|
if ( (0x1<<i) == ShmSize ) {
|
||||||
|
log2size = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(log2size != -1);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Identify subblock of ranks on node spreading across dims
|
||||||
|
// in a maximally symmetrical way
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int dim = 0;
|
||||||
|
|
||||||
|
std::vector<int> WorldDims = processors;
|
||||||
|
|
||||||
|
ShmDims.resize(_ndimension,1);
|
||||||
|
GroupDims.resize(_ndimension);
|
||||||
|
|
||||||
|
ShmCoor.resize(_ndimension);
|
||||||
|
GroupCoor.resize(_ndimension);
|
||||||
|
WorldCoor.resize(_ndimension);
|
||||||
|
|
||||||
|
for(int l2=0;l2<log2size;l2++){
|
||||||
|
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
|
||||||
|
ShmDims[dim]*=2;
|
||||||
|
dim=(dim+1)%_ndimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish torus of processes and nodes with sub-blockings
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
GroupDims[d] = WorldDims[d]/ShmDims[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Check processor counts match
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
_Nprocessors=1;
|
||||||
|
_processors = processors;
|
||||||
|
_processor_coor.resize(_ndimension);
|
||||||
|
for(int i=0;i<_ndimension;i++){
|
||||||
|
_Nprocessors*=_processors[i];
|
||||||
|
}
|
||||||
|
assert(WorldSize==_Nprocessors);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish mapping between lexico physics coord and WorldRank
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
LexicographicToWorldRank.resize(WorldSize,0);
|
||||||
|
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
|
||||||
|
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||||
|
}
|
||||||
|
_processor_coor = WorldCoor;
|
||||||
|
|
||||||
|
int lexico;
|
||||||
|
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
|
||||||
|
LexicographicToWorldRank[lexico]=WorldRank;
|
||||||
|
_processor = lexico;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// global sum Lexico to World mapping
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(double &d)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> reqs(0);
|
||||||
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
|
SendToRecvFromComplete(reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
assert(sender != receiver);
|
||||||
|
int tag = sender;
|
||||||
|
if ( _processor == sender ) {
|
||||||
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
|
}
|
||||||
|
if ( _processor == receiver ) {
|
||||||
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
|
||||||
|
static int sequence;
|
||||||
|
|
||||||
|
int ierr;
|
||||||
|
int tag;
|
||||||
|
int check;
|
||||||
|
|
||||||
|
assert(dest != _processor);
|
||||||
|
assert(from != _processor);
|
||||||
|
|
||||||
|
int gdest = GroupRanks[dest];
|
||||||
|
int gfrom = GroupRanks[from];
|
||||||
|
int gme = GroupRanks[_processor];
|
||||||
|
|
||||||
|
sequence++;
|
||||||
|
|
||||||
|
char *from_ptr = (char *)ShmCommBufs[ShmRank];
|
||||||
|
|
||||||
|
int small = (bytes<MAX_MPI_SHM_BYTES);
|
||||||
|
|
||||||
|
typedef uint64_t T;
|
||||||
|
int words = bytes/sizeof(T);
|
||||||
|
|
||||||
|
assert(((size_t)bytes &(sizeof(T)-1))==0);
|
||||||
|
assert(gme == ShmRank);
|
||||||
|
|
||||||
|
if ( small && (gdest !=MPI_UNDEFINED) ) {
|
||||||
|
|
||||||
|
char *to_ptr = (char *)ShmCommBufs[gdest];
|
||||||
|
|
||||||
|
assert(gme != gdest);
|
||||||
|
|
||||||
|
T *ip = (T *)xmit;
|
||||||
|
T *op = (T *)to_ptr;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int w=0;w<words;w++) {
|
||||||
|
op[w]=ip[w];
|
||||||
|
}
|
||||||
|
|
||||||
|
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
|
||||||
|
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
|
||||||
|
} else {
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(xrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
|
if (small && (gfrom !=MPI_UNDEFINED) ) {
|
||||||
|
T *ip = (T *)from_ptr;
|
||||||
|
T *op = (T *)recv;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int w=0;w<words;w++) {
|
||||||
|
op[w]=ip[w];
|
||||||
|
}
|
||||||
|
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
|
||||||
|
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
|
||||||
|
assert(check==sequence);
|
||||||
|
assert(tag==from);
|
||||||
|
} else {
|
||||||
|
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->StencilBarrier();
|
||||||
|
|
||||||
|
#else
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
int rank = _processor;
|
||||||
|
int ierr;
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
list.push_back(xrq);
|
||||||
|
list.push_back(rrq);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
|
||||||
|
int ierr;
|
||||||
|
|
||||||
|
assert(dest != _processor);
|
||||||
|
assert(from != _processor);
|
||||||
|
|
||||||
|
int gdest = GroupRanks[dest];
|
||||||
|
int gfrom = GroupRanks[from];
|
||||||
|
int gme = GroupRanks[_processor];
|
||||||
|
|
||||||
|
assert(gme == ShmRank);
|
||||||
|
|
||||||
|
if ( gdest == MPI_UNDEFINED ) {
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(xrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( gfrom ==MPI_UNDEFINED) {
|
||||||
|
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
assert(ierr==0);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
SendToRecvFromComplete(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilBarrier(void)
|
||||||
|
{
|
||||||
|
MPI_Win_sync (ShmWindow);
|
||||||
|
MPI_Barrier (ShmComm);
|
||||||
|
MPI_Win_sync (ShmWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
int nreq=list.size();
|
||||||
|
std::vector<MPI_Status> status(nreq);
|
||||||
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Barrier(void)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Barrier(communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr= MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
874
lib/communicator/Communicator_mpi3_leader.cc
Normal file
874
lib/communicator/Communicator_mpi3_leader.cc
Normal file
@ -0,0 +1,874 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// Workarounds:
|
||||||
|
/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix.
|
||||||
|
/// darwin dispatch semaphores don't seem to be multiprocess.
|
||||||
|
///
|
||||||
|
/// ii) openmpi under --mca shmem posix works with two squadrons per node;
|
||||||
|
/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME
|
||||||
|
/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI.
|
||||||
|
///
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <semaphore.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
typedef sem_t *Grid_semaphore;
|
||||||
|
|
||||||
|
#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED );
|
||||||
|
#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED );
|
||||||
|
#define SEM_POST(S) assert ( sem_post(S) == 0 );
|
||||||
|
#define SEM_WAIT(S) assert ( sem_wait(S) == 0 );
|
||||||
|
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL };
|
||||||
|
|
||||||
|
struct Descriptor {
|
||||||
|
uint64_t buf;
|
||||||
|
size_t bytes;
|
||||||
|
int rank;
|
||||||
|
int tag;
|
||||||
|
int command;
|
||||||
|
MPI_Request request;
|
||||||
|
};
|
||||||
|
|
||||||
|
const int pool = 48;
|
||||||
|
|
||||||
|
class SlaveState {
|
||||||
|
public:
|
||||||
|
volatile int head;
|
||||||
|
volatile int start;
|
||||||
|
volatile int tail;
|
||||||
|
volatile Descriptor Descrs[pool];
|
||||||
|
};
|
||||||
|
|
||||||
|
class Slave {
|
||||||
|
public:
|
||||||
|
Grid_semaphore sem_head;
|
||||||
|
Grid_semaphore sem_tail;
|
||||||
|
SlaveState *state;
|
||||||
|
MPI_Comm squadron;
|
||||||
|
uint64_t base;
|
||||||
|
int universe_rank;
|
||||||
|
int vertical_rank;
|
||||||
|
char sem_name [NAME_MAX];
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
// Descriptor circular pointers
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
Slave() {};
|
||||||
|
|
||||||
|
void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank);
|
||||||
|
|
||||||
|
void SemInit(void) {
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
|
||||||
|
// printf("SEM_NAME: %s \n",sem_name);
|
||||||
|
SEM_INIT(sem_head);
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
|
||||||
|
// printf("SEM_NAME: %s \n",sem_name);
|
||||||
|
SEM_INIT(sem_tail);
|
||||||
|
}
|
||||||
|
void SemInitExcl(void) {
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
|
||||||
|
// printf("SEM_INIT_EXCL: %s \n",sem_name);
|
||||||
|
SEM_INIT_EXCL(sem_head);
|
||||||
|
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
|
||||||
|
// printf("SEM_INIT_EXCL: %s \n",sem_name);
|
||||||
|
SEM_INIT_EXCL(sem_tail);
|
||||||
|
}
|
||||||
|
void WakeUpDMA(void) {
|
||||||
|
SEM_POST(sem_head);
|
||||||
|
};
|
||||||
|
void WakeUpCompute(void) {
|
||||||
|
SEM_POST(sem_tail);
|
||||||
|
};
|
||||||
|
void WaitForCommand(void) {
|
||||||
|
SEM_WAIT(sem_head);
|
||||||
|
};
|
||||||
|
void WaitForComplete(void) {
|
||||||
|
SEM_WAIT(sem_tail);
|
||||||
|
};
|
||||||
|
void EventLoop (void) {
|
||||||
|
// std::cout<< " Entering event loop "<<std::endl;
|
||||||
|
while(1){
|
||||||
|
WaitForCommand();
|
||||||
|
// std::cout << "Getting command "<<std::endl;
|
||||||
|
Event();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int Event (void) ;
|
||||||
|
|
||||||
|
uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ;
|
||||||
|
|
||||||
|
void WaitAll() {
|
||||||
|
// std::cout << "Queueing WAIT command "<<std::endl;
|
||||||
|
QueueCommand(COMMAND_WAITALL,0,0,0,squadron,0);
|
||||||
|
// std::cout << "Waking up DMA "<<std::endl;
|
||||||
|
WakeUpDMA();
|
||||||
|
// std::cout << "Waiting from semaphore "<<std::endl;
|
||||||
|
WaitForComplete();
|
||||||
|
// std::cout << "Checking FIFO is empty "<<std::endl;
|
||||||
|
assert ( state->tail == state->head );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// One instance of a data mover.
|
||||||
|
// Master and Slave must agree on location in shared memory
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
class MPIoffloadEngine {
|
||||||
|
public:
|
||||||
|
|
||||||
|
static std::vector<Slave> Slaves;
|
||||||
|
|
||||||
|
static int ShmSetup;
|
||||||
|
|
||||||
|
static int UniverseRank;
|
||||||
|
static int UniverseSize;
|
||||||
|
|
||||||
|
static MPI_Comm communicator_universe;
|
||||||
|
static MPI_Comm communicator_cached;
|
||||||
|
|
||||||
|
static MPI_Comm HorizontalComm;
|
||||||
|
static int HorizontalRank;
|
||||||
|
static int HorizontalSize;
|
||||||
|
|
||||||
|
static MPI_Comm VerticalComm;
|
||||||
|
static MPI_Win VerticalWindow;
|
||||||
|
static int VerticalSize;
|
||||||
|
static int VerticalRank;
|
||||||
|
|
||||||
|
static std::vector<void *> VerticalShmBufs;
|
||||||
|
static std::vector<std::vector<int> > UniverseRanks;
|
||||||
|
static std::vector<int> UserCommunicatorToWorldRanks;
|
||||||
|
|
||||||
|
static MPI_Group WorldGroup, CachedGroup;
|
||||||
|
|
||||||
|
static void CommunicatorInit (MPI_Comm &communicator_world,
|
||||||
|
MPI_Comm &ShmComm,
|
||||||
|
void * &ShmCommBuf);
|
||||||
|
|
||||||
|
static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
// routines for master proc must handle any communicator
|
||||||
|
/////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
// std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<<rank <<std::endl;
|
||||||
|
Slaves[slave].QueueCommand(COMMAND_ISEND,buf,bytes,tag,comm,rank);
|
||||||
|
// std::cout << "Queued send command to rank "<< rank<< " via "<<slave <<std::endl;
|
||||||
|
Slaves[slave].WakeUpDMA();
|
||||||
|
// std::cout << "Waking up DMA "<< slave<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void QueueRecv(int slave, void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
// std::cout<< " Queueing recv "<< bytes<< " slave "<< slave << " from comm "<<rank <<std::endl;
|
||||||
|
Slaves[slave].QueueCommand(COMMAND_IRECV,buf,bytes,tag,comm,rank);
|
||||||
|
// std::cout << "Queued recv command from rank "<< rank<< " via "<<slave <<std::endl;
|
||||||
|
Slaves[slave].WakeUpDMA();
|
||||||
|
// std::cout << "Waking up DMA "<< slave<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void WaitAll() {
|
||||||
|
for(int s=1;s<VerticalSize;s++) {
|
||||||
|
// std::cout << "Waiting for slave "<< s<<std::endl;
|
||||||
|
Slaves[s].WaitAll();
|
||||||
|
}
|
||||||
|
// std::cout << " Wait all Complete "<<std::endl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){
|
||||||
|
int basework = nwork/units;
|
||||||
|
int backfill = units-(nwork%units);
|
||||||
|
if ( me >= units ) {
|
||||||
|
mywork = myoff = 0;
|
||||||
|
} else {
|
||||||
|
mywork = (nwork+me)/units;
|
||||||
|
myoff = basework * me;
|
||||||
|
if ( me > backfill )
|
||||||
|
myoff+= (me-backfill);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
uint8_t * cbuf = (uint8_t *) buf;
|
||||||
|
int mywork, myoff, procs;
|
||||||
|
procs = VerticalSize-1;
|
||||||
|
for(int s=0;s<procs;s++) {
|
||||||
|
GetWork(bytes,s,mywork,myoff,procs);
|
||||||
|
QueueSend(s+1,&cbuf[myoff],mywork,tag,comm,rank);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void QueueMultiplexedRecv(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
|
||||||
|
uint8_t * cbuf = (uint8_t *) buf;
|
||||||
|
int mywork, myoff, procs;
|
||||||
|
procs = VerticalSize-1;
|
||||||
|
for(int s=0;s<procs;s++) {
|
||||||
|
GetWork(bytes,s,mywork,myoff,procs);
|
||||||
|
QueueRecv(s+1,&cbuf[myoff],mywork,tag,comm,rank);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
std::vector<Slave> MPIoffloadEngine::Slaves;
|
||||||
|
|
||||||
|
int MPIoffloadEngine::UniverseRank;
|
||||||
|
int MPIoffloadEngine::UniverseSize;
|
||||||
|
|
||||||
|
MPI_Comm MPIoffloadEngine::communicator_universe;
|
||||||
|
MPI_Comm MPIoffloadEngine::communicator_cached;
|
||||||
|
MPI_Group MPIoffloadEngine::WorldGroup;
|
||||||
|
MPI_Group MPIoffloadEngine::CachedGroup;
|
||||||
|
|
||||||
|
MPI_Comm MPIoffloadEngine::HorizontalComm;
|
||||||
|
int MPIoffloadEngine::HorizontalRank;
|
||||||
|
int MPIoffloadEngine::HorizontalSize;
|
||||||
|
|
||||||
|
MPI_Comm MPIoffloadEngine::VerticalComm;
|
||||||
|
int MPIoffloadEngine::VerticalSize;
|
||||||
|
int MPIoffloadEngine::VerticalRank;
|
||||||
|
MPI_Win MPIoffloadEngine::VerticalWindow;
|
||||||
|
std::vector<void *> MPIoffloadEngine::VerticalShmBufs;
|
||||||
|
std::vector<std::vector<int> > MPIoffloadEngine::UniverseRanks;
|
||||||
|
std::vector<int> MPIoffloadEngine::UserCommunicatorToWorldRanks;
|
||||||
|
|
||||||
|
int MPIoffloadEngine::ShmSetup = 0;
|
||||||
|
|
||||||
|
void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
|
||||||
|
MPI_Comm &ShmComm,
|
||||||
|
void * &ShmCommBuf)
|
||||||
|
{
|
||||||
|
int flag;
|
||||||
|
assert(ShmSetup==0);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Universe is all nodes prior to squadron grouping
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe);
|
||||||
|
MPI_Comm_rank(communicator_universe,&UniverseRank);
|
||||||
|
MPI_Comm_size(communicator_universe,&UniverseSize);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
// Split into groups that can share memory (Verticals)
|
||||||
|
/////////////////////////////////////////////////////////////////////
|
||||||
|
#undef MPI_SHARED_MEM_DEBUG
|
||||||
|
#ifdef MPI_SHARED_MEM_DEBUG
|
||||||
|
MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm);
|
||||||
|
#else
|
||||||
|
MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm);
|
||||||
|
#endif
|
||||||
|
MPI_Comm_rank(VerticalComm ,&VerticalRank);
|
||||||
|
MPI_Comm_size(VerticalComm ,&VerticalSize);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Split into horizontal groups by rank in squadron
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm);
|
||||||
|
MPI_Comm_rank(HorizontalComm,&HorizontalRank);
|
||||||
|
MPI_Comm_size(HorizontalComm,&HorizontalSize);
|
||||||
|
assert(HorizontalSize*VerticalSize==UniverseSize);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// What is my place in the world
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int WorldRank=0;
|
||||||
|
if(VerticalRank==0) WorldRank = HorizontalRank;
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Where is the world in the universe?
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
UniverseRanks = std::vector<std::vector<int> >(HorizontalSize,std::vector<int>(VerticalSize,0));
|
||||||
|
UniverseRanks[WorldRank][VerticalRank] = UniverseRank;
|
||||||
|
for(int w=0;w<HorizontalSize;w++){
|
||||||
|
ierr=MPI_Allreduce(MPI_IN_PLACE,&UniverseRanks[w][0],VerticalSize,MPI_INT,MPI_SUM,communicator_universe);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// allocate the shared window for our group, pass back Shm info to CartesianCommunicator
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
VerticalShmBufs.resize(VerticalSize);
|
||||||
|
|
||||||
|
#undef MPI_SHARED_MEM
|
||||||
|
#ifdef MPI_SHARED_MEM
|
||||||
|
ierr = MPI_Win_allocate_shared(CartesianCommunicator::MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,VerticalComm,&ShmCommBuf,&VerticalWindow);
|
||||||
|
ierr|= MPI_Win_lock_all (MPI_MODE_NOCHECK, VerticalWindow);
|
||||||
|
assert(ierr==0);
|
||||||
|
// std::cout<<"SHM "<<ShmCommBuf<<std::endl;
|
||||||
|
|
||||||
|
for(int r=0;r<VerticalSize;r++){
|
||||||
|
MPI_Aint sz;
|
||||||
|
int dsp_unit;
|
||||||
|
MPI_Win_shared_query (VerticalWindow, r, &sz, &dsp_unit, &VerticalShmBufs[r]);
|
||||||
|
// std::cout<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
char shm_name [NAME_MAX];
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
|
||||||
|
if ( VerticalRank == 0 ) {
|
||||||
|
for(int r=0;r<VerticalSize;r++){
|
||||||
|
|
||||||
|
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
|
||||||
|
if ( r>0 ) size = sizeof(SlaveState);
|
||||||
|
|
||||||
|
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r);
|
||||||
|
|
||||||
|
shm_unlink(shm_name);
|
||||||
|
|
||||||
|
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600);
|
||||||
|
if ( fd < 0 ) {
|
||||||
|
perror("failed shm_open");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
ftruncate(fd, size);
|
||||||
|
|
||||||
|
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
|
||||||
|
if ( VerticalShmBufs[r] == MAP_FAILED ) {
|
||||||
|
perror("failed mmap");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
|
||||||
|
check[0] = WorldRank;
|
||||||
|
check[1] = r;
|
||||||
|
|
||||||
|
// std::cout<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
|
||||||
|
if ( VerticalRank != 0 ) {
|
||||||
|
for(int r=0;r<VerticalSize;r++){
|
||||||
|
|
||||||
|
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES ;
|
||||||
|
if ( r>0 ) size = sizeof(SlaveState);
|
||||||
|
|
||||||
|
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r);
|
||||||
|
|
||||||
|
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600);
|
||||||
|
if ( fd<0 ) {
|
||||||
|
perror("failed shm_open");
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
|
||||||
|
uint64_t * check = (uint64_t *) VerticalShmBufs[r];
|
||||||
|
assert(check[0]== WorldRank);
|
||||||
|
assert(check[1]== r);
|
||||||
|
std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Map rank of leader on node in their in new world, to the
|
||||||
|
// rank in this vertical plane's horizontal communicator
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
communicator_world = HorizontalComm;
|
||||||
|
ShmComm = VerticalComm;
|
||||||
|
ShmCommBuf = VerticalShmBufs[0];
|
||||||
|
MPI_Comm_group (communicator_world, &WorldGroup);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Start the slave data movers
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
if ( VerticalRank != 0 ) {
|
||||||
|
Slave indentured;
|
||||||
|
indentured.Init( (SlaveState *) VerticalShmBufs[VerticalRank], VerticalComm, UniverseRank,VerticalRank);
|
||||||
|
indentured.SemInitExcl();// init semaphore in shared memory
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
indentured.EventLoop();
|
||||||
|
assert(0);
|
||||||
|
} else {
|
||||||
|
Slaves.resize(VerticalSize);
|
||||||
|
for(int i=1;i<VerticalSize;i++){
|
||||||
|
Slaves[i].Init((SlaveState *)VerticalShmBufs[i],VerticalComm, UniverseRanks[HorizontalRank][i],i);
|
||||||
|
}
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
for(int i=1;i<VerticalSize;i++){
|
||||||
|
Slaves[i].SemInit();// init semaphore in shared memory
|
||||||
|
}
|
||||||
|
MPI_Barrier(VerticalComm);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
// Verbose for now
|
||||||
|
///////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
ShmSetup=1;
|
||||||
|
|
||||||
|
if (UniverseRank == 0){
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Grid MPI-3 configuration: detected ";
|
||||||
|
std::cout<<UniverseSize << " Ranks " ;
|
||||||
|
std::cout<<HorizontalSize << " Nodes " ;
|
||||||
|
std::cout<<VerticalSize << " with ranks-per-node "<<std::endl;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "Grid MPI-3 configuration: using one lead process per node " << std::endl;
|
||||||
|
std::cout<<GridLogMessage << "Grid MPI-3 configuration: reduced communicator has size " << HorizontalSize << std::endl;
|
||||||
|
|
||||||
|
for(int g=0;g<HorizontalSize;g++){
|
||||||
|
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<< UniverseRanks[g][0]<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int g=0;g<HorizontalSize;g++){
|
||||||
|
std::cout<<GridLogMessage<<" { ";
|
||||||
|
for(int s=0;s<VerticalSize;s++){
|
||||||
|
std::cout<< UniverseRanks[g][s];
|
||||||
|
if ( s<VerticalSize-1 ) {
|
||||||
|
std::cout<<",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout<<" } "<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Map the communicator into communicator_world, and find the neighbour.
|
||||||
|
// Cache the mappings; cache size is 1.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
void MPIoffloadEngine::MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int rank) {
|
||||||
|
|
||||||
|
if ( comm == HorizontalComm ) {
|
||||||
|
comm_world_peer = rank;
|
||||||
|
// std::cout << " MapCommRankToWorldRank horiz " <<rank<<"->"<<comm_world_peer<<std::endl;
|
||||||
|
} else if ( comm == communicator_cached ) {
|
||||||
|
comm_world_peer = UserCommunicatorToWorldRanks[rank];
|
||||||
|
// std::cout << " MapCommRankToWorldRank cached " <<rank<<"->"<<comm_world_peer<<std::endl;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int size;
|
||||||
|
|
||||||
|
MPI_Comm_size(comm,&size);
|
||||||
|
|
||||||
|
UserCommunicatorToWorldRanks.resize(size);
|
||||||
|
|
||||||
|
std::vector<int> cached_ranks(size);
|
||||||
|
|
||||||
|
for(int r=0;r<size;r++) {
|
||||||
|
cached_ranks[r]=r;
|
||||||
|
}
|
||||||
|
|
||||||
|
communicator_cached=comm;
|
||||||
|
|
||||||
|
MPI_Comm_group(communicator_cached, &CachedGroup);
|
||||||
|
|
||||||
|
MPI_Group_translate_ranks(CachedGroup,size,&cached_ranks[0],WorldGroup, &UserCommunicatorToWorldRanks[0]);
|
||||||
|
|
||||||
|
comm_world_peer = UserCommunicatorToWorldRanks[rank];
|
||||||
|
// std::cout << " MapCommRankToWorldRank cache miss " <<rank<<"->"<<comm_world_peer<<std::endl;
|
||||||
|
|
||||||
|
assert(comm_world_peer != MPI_UNDEFINED);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert( (tag & (~0xFFFFL)) ==0);
|
||||||
|
|
||||||
|
uint64_t icomm = (uint64_t)comm;
|
||||||
|
int comm_hash = ((icomm>>0 )&0xFFFF)^((icomm>>16)&0xFFFF)
|
||||||
|
^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF);
|
||||||
|
|
||||||
|
// hashtag = (comm_hash<<15) | tag;
|
||||||
|
hashtag = tag;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank)
|
||||||
|
{
|
||||||
|
squadron=_squadron;
|
||||||
|
universe_rank=_universe_rank;
|
||||||
|
vertical_rank=_vertical_rank;
|
||||||
|
state =_state;
|
||||||
|
// std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<<universe_rank <<std::endl;
|
||||||
|
state->head = state->tail = state->start = 0;
|
||||||
|
base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0];
|
||||||
|
int rank; MPI_Comm_rank(_squadron,&rank);
|
||||||
|
}
|
||||||
|
#define PERI_PLUS(A) ( (A+1)%pool )
|
||||||
|
int Slave::Event (void) {
|
||||||
|
|
||||||
|
static int tail_last;
|
||||||
|
static int head_last;
|
||||||
|
static int start_last;
|
||||||
|
int ierr;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
// Try to advance the start pointers
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
int s=state->start;
|
||||||
|
if ( s != state->head ) {
|
||||||
|
switch ( state->Descrs[s].command ) {
|
||||||
|
case COMMAND_ISEND:
|
||||||
|
/*
|
||||||
|
std::cout<< " Send "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
|
||||||
|
<< " to " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
|
||||||
|
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me " <<universe_rank<< std::endl;
|
||||||
|
*/
|
||||||
|
ierr = MPI_Isend((void *)(state->Descrs[s].buf+base),
|
||||||
|
state->Descrs[s].bytes,
|
||||||
|
MPI_CHAR,
|
||||||
|
state->Descrs[s].rank,
|
||||||
|
state->Descrs[s].tag,
|
||||||
|
MPIoffloadEngine::communicator_universe,
|
||||||
|
(MPI_Request *)&state->Descrs[s].request);
|
||||||
|
assert(ierr==0);
|
||||||
|
state->start = PERI_PLUS(s);
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case COMMAND_IRECV:
|
||||||
|
/*
|
||||||
|
std::cout<< " Recv "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
|
||||||
|
<< " from " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
|
||||||
|
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me "<< universe_rank<< std::endl;
|
||||||
|
*/
|
||||||
|
ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base),
|
||||||
|
state->Descrs[s].bytes,
|
||||||
|
MPI_CHAR,
|
||||||
|
state->Descrs[s].rank,
|
||||||
|
state->Descrs[s].tag,
|
||||||
|
MPIoffloadEngine::communicator_universe,
|
||||||
|
(MPI_Request *)&state->Descrs[s].request);
|
||||||
|
|
||||||
|
// std::cout<< " Request is "<<state->Descrs[s].request<<std::endl;
|
||||||
|
// std::cout<< " Request0 is "<<state->Descrs[0].request<<std::endl;
|
||||||
|
assert(ierr==0);
|
||||||
|
state->start = PERI_PLUS(s);
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case COMMAND_WAITALL:
|
||||||
|
|
||||||
|
for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){
|
||||||
|
MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
|
||||||
|
};
|
||||||
|
s=PERI_PLUS(s);
|
||||||
|
state->start = s;
|
||||||
|
state->tail = s;
|
||||||
|
|
||||||
|
WakeUpCompute();
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// External interaction with the queue
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank)
|
||||||
|
{
|
||||||
|
/////////////////////////////////////////
|
||||||
|
// Spin; if FIFO is full until not full
|
||||||
|
/////////////////////////////////////////
|
||||||
|
int head =state->head;
|
||||||
|
int next = PERI_PLUS(head);
|
||||||
|
|
||||||
|
// Set up descriptor
|
||||||
|
int worldrank;
|
||||||
|
int hashtag;
|
||||||
|
MPI_Comm communicator;
|
||||||
|
MPI_Request request;
|
||||||
|
|
||||||
|
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank);
|
||||||
|
|
||||||
|
uint64_t relative= (uint64_t)buf - base;
|
||||||
|
state->Descrs[head].buf = relative;
|
||||||
|
state->Descrs[head].bytes = bytes;
|
||||||
|
state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
|
||||||
|
state->Descrs[head].tag = hashtag;
|
||||||
|
state->Descrs[head].command= command;
|
||||||
|
|
||||||
|
/*
|
||||||
|
if ( command == COMMAND_ISEND ) {
|
||||||
|
std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank
|
||||||
|
<< " to worldrank " << worldrank <<std::endl;
|
||||||
|
std::cout << " via VerticalRank "<< vertical_rank <<" to universerank " << MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]<<std::endl;
|
||||||
|
std::cout << " QueueCommand "<<buf<<"["<<bytes<<"]" << std::endl;
|
||||||
|
}
|
||||||
|
if ( command == COMMAND_IRECV ) {
|
||||||
|
std::cout << "QueueRecv on "<< universe_rank <<" from commrank " << commrank
|
||||||
|
<< " from worldrank " << worldrank <<std::endl;
|
||||||
|
std::cout << " via VerticalRank "<< vertical_rank <<" from universerank " << MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]<<std::endl;
|
||||||
|
std::cout << " QueueSend "<<buf<<"["<<bytes<<"]" << std::endl;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
// Block until FIFO has space
|
||||||
|
while( state->tail==next );
|
||||||
|
|
||||||
|
// Msync on weak order architectures
|
||||||
|
// Advance pointer
|
||||||
|
state->head = next;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
MPI_Comm CartesianCommunicator::communicator_world;
|
||||||
|
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv)
|
||||||
|
{
|
||||||
|
int flag;
|
||||||
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
|
if ( !flag ) {
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
}
|
||||||
|
communicator_world = MPI_COMM_WORLD;
|
||||||
|
MPI_Comm ShmComm;
|
||||||
|
MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
int rank;
|
||||||
|
int ierr=MPI_Cart_rank (communicator, &coor[0], &rank);
|
||||||
|
assert(ierr==0);
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
coor.resize(_ndimension);
|
||||||
|
int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
|
{
|
||||||
|
_ndimension = processors.size();
|
||||||
|
std::vector<int> periodic(_ndimension,1);
|
||||||
|
|
||||||
|
_Nprocessors=1;
|
||||||
|
_processors = processors;
|
||||||
|
|
||||||
|
for(int i=0;i<_ndimension;i++){
|
||||||
|
_Nprocessors*=_processors[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
int Size;
|
||||||
|
MPI_Comm_size(communicator_world,&Size);
|
||||||
|
assert(Size==_Nprocessors);
|
||||||
|
|
||||||
|
_processor_coor.resize(_ndimension);
|
||||||
|
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||||
|
MPI_Comm_rank (communicator,&_processor);
|
||||||
|
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||||
|
};
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(double &d)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> reqs(0);
|
||||||
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
|
SendToRecvFromComplete(reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
assert(sender != receiver);
|
||||||
|
int tag = sender;
|
||||||
|
if ( _processor == sender ) {
|
||||||
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
|
}
|
||||||
|
if ( _processor == receiver ) {
|
||||||
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
int rank = _processor;
|
||||||
|
int ierr;
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
list.push_back(xrq);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
uint64_t xmit_i = (uint64_t) xmit;
|
||||||
|
uint64_t recv_i = (uint64_t) recv;
|
||||||
|
uint64_t shm = (uint64_t) ShmCommBuf;
|
||||||
|
// assert xmit and recv lie in shared memory region
|
||||||
|
assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
|
||||||
|
assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
|
||||||
|
assert(from!=_processor);
|
||||||
|
assert(dest!=_processor);
|
||||||
|
MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
|
||||||
|
MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
MPIoffloadEngine::WaitAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::StencilBarrier(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
int nreq=list.size();
|
||||||
|
std::vector<MPI_Status> status(nreq);
|
||||||
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Barrier(void)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Barrier(communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr= MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||||
|
|
||||||
|
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
@ -28,12 +28,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void CartesianCommunicator::Init(int *argc, char *** arv)
|
void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||||
{
|
{
|
||||||
|
ShmInitGeneric();
|
||||||
}
|
}
|
||||||
|
|
||||||
int Rank(void ){ return 0; };
|
|
||||||
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
_processors = processors;
|
_processors = processors;
|
||||||
@ -89,30 +92,17 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CartesianCommunicator::Barrier(void)
|
int CartesianCommunicator::RankWorld(void){return 0;}
|
||||||
{
|
void CartesianCommunicator::Barrier(void){}
|
||||||
}
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||||
{
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
|
||||||
}
|
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
{
|
{
|
||||||
source =0;
|
source =0;
|
||||||
dest=0;
|
dest=0;
|
||||||
}
|
}
|
||||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -39,14 +39,24 @@ namespace Grid {
|
|||||||
BACKTRACEFILE(); \
|
BACKTRACEFILE(); \
|
||||||
}\
|
}\
|
||||||
}
|
}
|
||||||
int Rank(void) {
|
|
||||||
return shmem_my_pe();
|
|
||||||
}
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Info that is setup once and indept of cartesian layout
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
typedef struct HandShake_t {
|
typedef struct HandShake_t {
|
||||||
uint64_t seq_local;
|
uint64_t seq_local;
|
||||||
uint64_t seq_remote;
|
uint64_t seq_remote;
|
||||||
} HandShake;
|
} HandShake;
|
||||||
|
|
||||||
|
std::array<long,_SHMEM_REDUCE_SYNC_SIZE> make_psync_init(void) {
|
||||||
|
array<long,_SHMEM_REDUCE_SYNC_SIZE> ret;
|
||||||
|
ret.fill(SHMEM_SYNC_VALUE);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync_init = make_psync_init();
|
||||||
|
|
||||||
static Vector< HandShake > XConnections;
|
static Vector< HandShake > XConnections;
|
||||||
static Vector< HandShake > RConnections;
|
static Vector< HandShake > RConnections;
|
||||||
|
|
||||||
@ -61,7 +71,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
|||||||
RConnections[pe].seq_remote= 0;
|
RConnections[pe].seq_remote= 0;
|
||||||
}
|
}
|
||||||
shmem_barrier_all();
|
shmem_barrier_all();
|
||||||
|
ShmInitGeneric();
|
||||||
}
|
}
|
||||||
|
|
||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
{
|
{
|
||||||
_ndimension = processors.size();
|
_ndimension = processors.size();
|
||||||
@ -89,7 +101,7 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){
|
|||||||
static long long source ;
|
static long long source ;
|
||||||
static long long dest ;
|
static long long dest ;
|
||||||
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
// int nreduce=1;
|
// int nreduce=1;
|
||||||
// int pestart=0;
|
// int pestart=0;
|
||||||
@ -105,7 +117,7 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
|
|||||||
static long long source ;
|
static long long source ;
|
||||||
static long long dest ;
|
static long long dest ;
|
||||||
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
// int nreduce=1;
|
// int nreduce=1;
|
||||||
// int pestart=0;
|
// int pestart=0;
|
||||||
@ -121,7 +133,7 @@ void CartesianCommunicator::GlobalSum(float &f){
|
|||||||
static float source ;
|
static float source ;
|
||||||
static float dest ;
|
static float dest ;
|
||||||
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
source = f;
|
source = f;
|
||||||
dest =0.0;
|
dest =0.0;
|
||||||
@ -133,7 +145,7 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
|||||||
static float source ;
|
static float source ;
|
||||||
static float dest = 0 ;
|
static float dest = 0 ;
|
||||||
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
if ( shmem_addr_accessible(f,_processor) ){
|
if ( shmem_addr_accessible(f,_processor) ){
|
||||||
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
|
shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync);
|
||||||
@ -152,7 +164,7 @@ void CartesianCommunicator::GlobalSum(double &d)
|
|||||||
static double source;
|
static double source;
|
||||||
static double dest ;
|
static double dest ;
|
||||||
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
source = d;
|
source = d;
|
||||||
dest = 0;
|
dest = 0;
|
||||||
@ -164,7 +176,8 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
|||||||
static double source ;
|
static double source ;
|
||||||
static double dest ;
|
static double dest ;
|
||||||
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
|
|
||||||
|
|
||||||
if ( shmem_addr_accessible(d,_processor) ){
|
if ( shmem_addr_accessible(d,_processor) ){
|
||||||
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
|
shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync);
|
||||||
@ -230,12 +243,9 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
|
|
||||||
if ( _processor == sender ) {
|
if ( _processor == sender ) {
|
||||||
|
|
||||||
printf("Sender SHMEM pt2pt %d -> %d\n",sender,receiver);
|
|
||||||
// Check he has posted a receive
|
// Check he has posted a receive
|
||||||
while(SendSeq->seq_remote == SendSeq->seq_local);
|
while(SendSeq->seq_remote == SendSeq->seq_local);
|
||||||
|
|
||||||
printf("Sender receive %d posted\n",sender,receiver);
|
|
||||||
|
|
||||||
// Advance our send count
|
// Advance our send count
|
||||||
seq = ++(SendSeq->seq_local);
|
seq = ++(SendSeq->seq_local);
|
||||||
|
|
||||||
@ -244,26 +254,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
|||||||
shmem_putmem(recv,xmit,bytes,receiver);
|
shmem_putmem(recv,xmit,bytes,receiver);
|
||||||
shmem_fence();
|
shmem_fence();
|
||||||
|
|
||||||
printf("Sender sent payload %d\n",seq);
|
|
||||||
//Notify him we're done
|
//Notify him we're done
|
||||||
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
|
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
|
||||||
shmem_fence();
|
shmem_fence();
|
||||||
printf("Sender ringing door bell %d\n",seq);
|
|
||||||
}
|
}
|
||||||
if ( _processor == receiver ) {
|
if ( _processor == receiver ) {
|
||||||
|
|
||||||
printf("Receiver SHMEM pt2pt %d->%d\n",sender,receiver);
|
|
||||||
// Post a receive
|
// Post a receive
|
||||||
seq = ++(RecvSeq->seq_local);
|
seq = ++(RecvSeq->seq_local);
|
||||||
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
|
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
|
||||||
|
|
||||||
printf("Receiver Opening letter box %d\n",seq);
|
|
||||||
|
|
||||||
|
|
||||||
// Now wait until he has advanced our reception counter
|
// Now wait until he has advanced our reception counter
|
||||||
while(RecvSeq->seq_remote != RecvSeq->seq_local);
|
while(RecvSeq->seq_remote != RecvSeq->seq_local);
|
||||||
|
|
||||||
printf("Receiver Got the mail %d\n",seq);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -291,7 +294,7 @@ void CartesianCommunicator::Barrier(void)
|
|||||||
}
|
}
|
||||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
{
|
{
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
static uint32_t word;
|
static uint32_t word;
|
||||||
uint32_t *array = (uint32_t *) data;
|
uint32_t *array = (uint32_t *) data;
|
||||||
assert( (bytes % 4)==0);
|
assert( (bytes % 4)==0);
|
||||||
@ -314,7 +317,7 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
|||||||
}
|
}
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
{
|
{
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static std::array<long,_SHMEM_REDUCE_SYNC_SIZE> psync = psync_init;
|
||||||
static uint32_t word;
|
static uint32_t word;
|
||||||
uint32_t *array = (uint32_t *) data;
|
uint32_t *array = (uint32_t *) data;
|
||||||
assert( (bytes % 4)==0);
|
assert( (bytes % 4)==0);
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
@ -44,7 +45,7 @@ public:
|
|||||||
// Gather for when there is no need to SIMD split with compression
|
// Gather for when there is no need to SIMD split with compression
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
template<class vobj,class cobj,class compressor> void
|
template<class vobj,class cobj,class compressor> void
|
||||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
|
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
|
||||||
{
|
{
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
|
|||||||
|
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
PARALLEL_NESTED_LOOP2
|
PARALLEL_NESTED_LOOP2
|
||||||
@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
|
std::vector<std::pair<int,int> > table;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
buffer[off+bo++]=compress(rhs._odata[so+o+b]);
|
table.push_back(std::pair<int,int> (bo++,o+b));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int i=0;i<table.size();i++){
|
||||||
|
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,6 +114,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
int o = n*n1;
|
int o = n*n1;
|
||||||
int offset = b+n*n2;
|
int offset = b+n*n2;
|
||||||
cobj temp =compress(rhs._odata[so+o+b]);
|
cobj temp =compress(rhs._odata[so+o+b]);
|
||||||
|
|
||||||
extract<cobj>(temp,pointers,offset);
|
extract<cobj>(temp,pointers,offset);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -114,6 +122,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
} else {
|
} else {
|
||||||
|
|
||||||
assert(0); //Fixme think this is buggy
|
assert(0); //Fixme think this is buggy
|
||||||
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o=n*rhs._grid->_slice_stride[dimension];
|
int o=n*rhs._grid->_slice_stride[dimension];
|
||||||
@ -132,7 +141,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Gather for when there is no need to SIMD split
|
// Gather for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
SimpleCompressor<vobj> dontcompress;
|
SimpleCompressor<vobj> dontcompress;
|
||||||
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
|
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
|
||||||
@ -150,7 +159,7 @@ template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vec
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Scatter for when there is no need to SIMD split
|
// Scatter for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
@ -119,8 +119,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
||||||
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
|
commVector<vobj> send_buf(buffer_size);
|
||||||
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
|
commVector<vobj> recv_buf(buffer_size);
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
@ -191,8 +191,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
int words = sizeof(vobj)/sizeof(vector_type);
|
int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<Vector<scalar_object> > send_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
std::vector<Vector<scalar_object> > recv_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
|
|
||||||
int bytes = buffer_size*sizeof(scalar_object);
|
int bytes = buffer_size*sizeof(scalar_object);
|
||||||
|
|
||||||
|
@ -1,73 +1,74 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/lattice/Lattice_ET.h
|
Source file: ./lib/lattice/Lattice_ET.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_ET_H
|
#ifndef GRID_LATTICE_ET_H
|
||||||
#define GRID_LATTICE_ET_H
|
#define GRID_LATTICE_ET_H
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Predicated where support
|
// Predicated where support
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
template<class iobj,class vobj,class robj>
|
template <class iobj, class vobj, class robj>
|
||||||
inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) {
|
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
|
||||||
|
const robj &iffalse) {
|
||||||
|
typename std::remove_const<vobj>::type ret;
|
||||||
|
|
||||||
typename std::remove_const<vobj>::type ret;
|
typedef typename vobj::scalar_object scalar_object;
|
||||||
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
|
typedef typename vobj::vector_type vector_type;
|
||||||
|
|
||||||
typedef typename vobj::scalar_object scalar_object;
|
const int Nsimd = vobj::vector_type::Nsimd();
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
const int words = sizeof(vobj) / sizeof(vector_type);
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
|
|
||||||
const int Nsimd = vobj::vector_type::Nsimd();
|
std::vector<Integer> mask(Nsimd);
|
||||||
const int words = sizeof(vobj)/sizeof(vector_type);
|
std::vector<scalar_object> truevals(Nsimd);
|
||||||
|
std::vector<scalar_object> falsevals(Nsimd);
|
||||||
|
|
||||||
std::vector<Integer> mask(Nsimd);
|
extract(iftrue, truevals);
|
||||||
std::vector<scalar_object> truevals (Nsimd);
|
extract(iffalse, falsevals);
|
||||||
std::vector<scalar_object> falsevals(Nsimd);
|
extract<vInteger, Integer>(TensorRemove(predicate), mask);
|
||||||
|
|
||||||
extract(iftrue ,truevals);
|
for (int s = 0; s < Nsimd; s++) {
|
||||||
extract(iffalse ,falsevals);
|
if (mask[s]) falsevals[s] = truevals[s];
|
||||||
extract<vInteger,Integer>(TensorRemove(predicate),mask);
|
|
||||||
|
|
||||||
for(int s=0;s<Nsimd;s++){
|
|
||||||
if (mask[s]) falsevals[s]=truevals[s];
|
|
||||||
}
|
|
||||||
|
|
||||||
merge(ret,falsevals);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
merge(ret, falsevals);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// recursive evaluation of expressions; Could
|
// recursive evaluation of expressions; Could
|
||||||
// switch to generic approach with variadics, a la
|
// switch to generic approach with variadics, a la
|
||||||
@ -75,303 +76,353 @@ namespace Grid {
|
|||||||
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
// from tuple is hideous; C++14 introduces std::make_index_sequence for this
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
|
// leaf eval of lattice ; should enable if protect using traits
|
||||||
|
|
||||||
//leaf eval of lattice ; should enable if protect using traits
|
template <typename T>
|
||||||
|
using is_lattice = std::is_base_of<LatticeBase, T>;
|
||||||
|
|
||||||
template <typename T> using is_lattice = std::is_base_of<LatticeBase,T >;
|
template <typename T>
|
||||||
|
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
|
||||||
|
|
||||||
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
|
||||||
|
|
||||||
|
//Specialization of getVectorType for lattices
|
||||||
|
template<typename T>
|
||||||
|
struct getVectorType<Lattice<T> >{
|
||||||
|
typedef typename Lattice<T>::vector_object type;
|
||||||
|
};
|
||||||
|
|
||||||
template<class sobj>
|
template<class sobj>
|
||||||
inline sobj eval(const unsigned int ss, const sobj &arg)
|
inline sobj eval(const unsigned int ss, const sobj &arg)
|
||||||
{
|
{
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
template<class lobj>
|
template <class lobj>
|
||||||
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg)
|
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) {
|
||||||
{
|
return arg._odata[ss];
|
||||||
return arg._odata[ss];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle nodes in syntax tree
|
// handle nodes in syntax tree
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
auto inline eval(const unsigned int ss, const LatticeUnaryExpression<Op,T1 > &expr) // eval one operand
|
auto inline eval(
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second))))
|
const unsigned int ss,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)));
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
auto inline eval(const unsigned int ss, const LatticeBinaryExpression<Op,T1,T2> &expr) // eval two operands
|
auto inline eval(
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))))
|
const unsigned int ss,
|
||||||
{
|
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)));
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
auto inline eval(const unsigned int ss, const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) // eval three operands
|
auto inline eval(const unsigned int ss,
|
||||||
-> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second))))
|
const LatticeTrinaryExpression<Op, T1, T2, T3>
|
||||||
{
|
&expr) // eval three operands
|
||||||
return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) );
|
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)),
|
||||||
|
eval(ss, std::get<2>(expr.second)))) {
|
||||||
|
return expr.first.func(eval(ss, std::get<0>(expr.second)),
|
||||||
|
eval(ss, std::get<1>(expr.second)),
|
||||||
|
eval(ss, std::get<2>(expr.second)));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion
|
// Obtain the grid from an expression, ensuring conformable. This must follow a
|
||||||
|
// tree recursion
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
template <class T1,
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf
|
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
{
|
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
|
||||||
if ( grid ) {
|
|
||||||
conformable(grid,lat._grid);
|
|
||||||
}
|
|
||||||
grid=lat._grid;
|
|
||||||
}
|
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
|
||||||
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
|
|
||||||
{
|
{
|
||||||
|
if (grid) {
|
||||||
|
conformable(grid, lat._grid);
|
||||||
|
}
|
||||||
|
grid = lat._grid;
|
||||||
}
|
}
|
||||||
|
template <class T1,
|
||||||
|
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void GridFromExpression(GridBase *&grid,
|
||||||
|
const T1 ¬lat) // non-lattice leaf
|
||||||
|
{}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void GridFromExpression(GridBase *&grid,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
inline void GridFromExpression(
|
||||||
{
|
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid, std::get<1>(expr.second));
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void GridFromExpression(
|
||||||
{
|
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||||
GridFromExpression(grid,std::get<0>(expr.second));// recurse
|
GridFromExpression(grid, std::get<0>(expr.second)); // recurse
|
||||||
GridFromExpression(grid,std::get<1>(expr.second));
|
GridFromExpression(grid, std::get<1>(expr.second));
|
||||||
GridFromExpression(grid,std::get<2>(expr.second));
|
GridFromExpression(grid, std::get<2>(expr.second));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
|
// Obtain the CB from an expression, ensuring conformable. This must follow a
|
||||||
|
// tree recursion
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
|
template <class T1,
|
||||||
inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
|
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
|
||||||
{
|
{
|
||||||
if ( (cb==Odd) || (cb==Even) ) {
|
if ((cb == Odd) || (cb == Even)) {
|
||||||
assert(cb==lat.checkerboard);
|
assert(cb == lat.checkerboard);
|
||||||
}
|
}
|
||||||
cb=lat.checkerboard;
|
cb = lat.checkerboard;
|
||||||
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
|
template <class T1,
|
||||||
inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
|
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
|
||||||
|
inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf
|
||||||
{
|
{
|
||||||
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
|
inline void CBFromExpression(int &cb,
|
||||||
{
|
const LatticeUnaryExpression<Op, T1> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename T1, typename T2>
|
template <typename Op, typename T1, typename T2>
|
||||||
inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)
|
inline void CBFromExpression(int &cb,
|
||||||
{
|
const LatticeBinaryExpression<Op, T1, T2> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb, std::get<1>(expr.second));
|
||||||
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1, typename T2, typename T3>
|
template <typename Op, typename T1, typename T2, typename T3>
|
||||||
inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
|
inline void CBFromExpression(
|
||||||
{
|
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) {
|
||||||
CBFromExpression(cb,std::get<0>(expr.second));// recurse
|
CBFromExpression(cb, std::get<0>(expr.second)); // recurse
|
||||||
CBFromExpression(cb,std::get<1>(expr.second));
|
CBFromExpression(cb, std::get<1>(expr.second));
|
||||||
CBFromExpression(cb,std::get<2>(expr.second));
|
CBFromExpression(cb, std::get<2>(expr.second));
|
||||||
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Unary operators and funcs
|
// Unary operators and funcs
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#define GridUnopClass(name,ret)\
|
#define GridUnopClass(name, ret) \
|
||||||
template <class arg> struct name\
|
template <class arg> \
|
||||||
{\
|
struct name { \
|
||||||
static auto inline func(const arg a)-> decltype(ret) { return ret; } \
|
static auto inline func(const arg a) -> decltype(ret) { return ret; } \
|
||||||
};
|
};
|
||||||
|
|
||||||
GridUnopClass(UnarySub,-a);
|
GridUnopClass(UnarySub, -a);
|
||||||
GridUnopClass(UnaryNot,Not(a));
|
GridUnopClass(UnaryNot, Not(a));
|
||||||
GridUnopClass(UnaryAdj,adj(a));
|
GridUnopClass(UnaryAdj, adj(a));
|
||||||
GridUnopClass(UnaryConj,conjugate(a));
|
GridUnopClass(UnaryConj, conjugate(a));
|
||||||
GridUnopClass(UnaryTrace,trace(a));
|
GridUnopClass(UnaryTrace, trace(a));
|
||||||
GridUnopClass(UnaryTranspose,transpose(a));
|
GridUnopClass(UnaryTranspose, transpose(a));
|
||||||
GridUnopClass(UnaryTa,Ta(a));
|
GridUnopClass(UnaryTa, Ta(a));
|
||||||
GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
|
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a));
|
||||||
GridUnopClass(UnaryReal,real(a));
|
GridUnopClass(UnaryReal, real(a));
|
||||||
GridUnopClass(UnaryImag,imag(a));
|
GridUnopClass(UnaryImag, imag(a));
|
||||||
GridUnopClass(UnaryToReal,toReal(a));
|
GridUnopClass(UnaryToReal, toReal(a));
|
||||||
GridUnopClass(UnaryToComplex,toComplex(a));
|
GridUnopClass(UnaryToComplex, toComplex(a));
|
||||||
GridUnopClass(UnaryAbs,abs(a));
|
GridUnopClass(UnaryTimesI, timesI(a));
|
||||||
GridUnopClass(UnarySqrt,sqrt(a));
|
GridUnopClass(UnaryTimesMinusI, timesMinusI(a));
|
||||||
GridUnopClass(UnaryRsqrt,rsqrt(a));
|
GridUnopClass(UnaryAbs, abs(a));
|
||||||
GridUnopClass(UnarySin,sin(a));
|
GridUnopClass(UnarySqrt, sqrt(a));
|
||||||
GridUnopClass(UnaryCos,cos(a));
|
GridUnopClass(UnaryRsqrt, rsqrt(a));
|
||||||
GridUnopClass(UnaryLog,log(a));
|
GridUnopClass(UnarySin, sin(a));
|
||||||
GridUnopClass(UnaryExp,exp(a));
|
GridUnopClass(UnaryCos, cos(a));
|
||||||
|
GridUnopClass(UnaryAsin, asin(a));
|
||||||
|
GridUnopClass(UnaryAcos, acos(a));
|
||||||
|
GridUnopClass(UnaryLog, log(a));
|
||||||
|
GridUnopClass(UnaryExp, exp(a));
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Binary operators
|
// Binary operators
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#define GridBinOpClass(name,combination)\
|
#define GridBinOpClass(name, combination) \
|
||||||
template <class left,class right>\
|
template <class left, class right> \
|
||||||
struct name\
|
struct name { \
|
||||||
{\
|
static auto inline func(const left &lhs, const right &rhs) \
|
||||||
static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \
|
-> decltype(combination) const { \
|
||||||
{\
|
return combination; \
|
||||||
return combination;\
|
} \
|
||||||
}\
|
}
|
||||||
}
|
GridBinOpClass(BinaryAdd, lhs + rhs);
|
||||||
GridBinOpClass(BinaryAdd,lhs+rhs);
|
GridBinOpClass(BinarySub, lhs - rhs);
|
||||||
GridBinOpClass(BinarySub,lhs-rhs);
|
GridBinOpClass(BinaryMul, lhs *rhs);
|
||||||
GridBinOpClass(BinaryMul,lhs*rhs);
|
GridBinOpClass(BinaryDiv, lhs /rhs);
|
||||||
|
|
||||||
GridBinOpClass(BinaryAnd ,lhs&rhs);
|
GridBinOpClass(BinaryAnd, lhs &rhs);
|
||||||
GridBinOpClass(BinaryOr ,lhs|rhs);
|
GridBinOpClass(BinaryOr, lhs | rhs);
|
||||||
GridBinOpClass(BinaryAndAnd,lhs&&rhs);
|
GridBinOpClass(BinaryAndAnd, lhs &&rhs);
|
||||||
GridBinOpClass(BinaryOrOr ,lhs||rhs);
|
GridBinOpClass(BinaryOrOr, lhs || rhs);
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// Trinary conditional op
|
// Trinary conditional op
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
#define GridTrinOpClass(name,combination)\
|
#define GridTrinOpClass(name, combination) \
|
||||||
template <class predicate,class left, class right> \
|
template <class predicate, class left, class right> \
|
||||||
struct name\
|
struct name { \
|
||||||
{\
|
static auto inline func(const predicate &pred, const left &lhs, \
|
||||||
static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \
|
const right &rhs) -> decltype(combination) const { \
|
||||||
{\
|
return combination; \
|
||||||
return combination;\
|
} \
|
||||||
}\
|
}
|
||||||
}
|
|
||||||
|
|
||||||
GridTrinOpClass(TrinaryWhere,(predicatedWhere<predicate, \
|
GridTrinOpClass(
|
||||||
typename std::remove_reference<left>::type, \
|
TrinaryWhere,
|
||||||
typename std::remove_reference<right>::type> (pred,lhs,rhs)));
|
(predicatedWhere<predicate, typename std::remove_reference<left>::type,
|
||||||
|
typename std::remove_reference<right>::type>(pred, lhs,
|
||||||
|
rhs)));
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Operator syntactical glue
|
// Operator syntactical glue
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
|
||||||
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
|
||||||
#define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
|
||||||
|
|
||||||
#define GRID_DEF_UNOP(op, name)\
|
#define GRID_UNOP(name) name<decltype(eval(0, arg))>
|
||||||
template <typename T1,\
|
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \
|
#define GRID_TRINOP(name) \
|
||||||
-> decltype(LatticeUnaryExpression<GRID_UNOP(name),const T1&>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \
|
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
|
||||||
{ return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); }
|
|
||||||
|
|
||||||
#define GRID_BINOP_LEFT(op, name)\
|
#define GRID_DEF_UNOP(op, name) \
|
||||||
template <typename T1,typename T2,\
|
template <typename T1, \
|
||||||
typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr>\
|
typename std::enable_if<is_lattice<T1>::value || \
|
||||||
inline auto op(const T1 &lhs,const T2&rhs) \
|
is_lattice_expr<T1>::value, \
|
||||||
-> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
T1>::type * = nullptr> \
|
||||||
std::forward_as_tuple(lhs, rhs)))) \
|
inline auto op(const T1 &arg) \
|
||||||
{\
|
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
|
||||||
std::forward_as_tuple(lhs, rhs))); \
|
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
|
||||||
}
|
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_BINOP_RIGHT(op, name)\
|
#define GRID_BINOP_LEFT(op, name) \
|
||||||
template <typename T1,typename T2,\
|
template <typename T1, typename T2, \
|
||||||
typename std::enable_if<!is_lattice<T1>::value && !is_lattice_expr<T1>::value, T1>::type* = nullptr,\
|
typename std::enable_if<is_lattice<T1>::value || \
|
||||||
typename std::enable_if< is_lattice<T2>::value || is_lattice_expr<T2>::value, T2>::type* = nullptr> \
|
is_lattice_expr<T1>::value, \
|
||||||
inline auto op(const T1 &lhs,const T2&rhs) \
|
T1>::type * = nullptr> \
|
||||||
-> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||||
std::forward_as_tuple(lhs, rhs)))) \
|
->decltype( \
|
||||||
{\
|
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
|
std::make_pair(GRID_BINOP(name)(), \
|
||||||
std::forward_as_tuple(lhs, rhs))); \
|
std::forward_as_tuple(lhs, rhs)))) { \
|
||||||
}
|
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||||
|
}
|
||||||
|
|
||||||
#define GRID_DEF_BINOP(op, name)\
|
#define GRID_BINOP_RIGHT(op, name) \
|
||||||
GRID_BINOP_LEFT(op,name);\
|
template <typename T1, typename T2, \
|
||||||
GRID_BINOP_RIGHT(op,name);
|
typename std::enable_if<!is_lattice<T1>::value && \
|
||||||
|
!is_lattice_expr<T1>::value, \
|
||||||
|
T1>::type * = nullptr, \
|
||||||
|
typename std::enable_if<is_lattice<T2>::value || \
|
||||||
|
is_lattice_expr<T2>::value, \
|
||||||
|
T2>::type * = nullptr> \
|
||||||
|
inline auto op(const T1 &lhs, const T2 &rhs) \
|
||||||
|
->decltype( \
|
||||||
|
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), \
|
||||||
|
std::forward_as_tuple(lhs, rhs)))) { \
|
||||||
|
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
|
||||||
|
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GRID_DEF_BINOP(op, name) \
|
||||||
|
GRID_BINOP_LEFT(op, name); \
|
||||||
|
GRID_BINOP_RIGHT(op, name);
|
||||||
|
|
||||||
#define GRID_DEF_TRINOP(op, name)\
|
#define GRID_DEF_TRINOP(op, name) \
|
||||||
template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \
|
template <typename T1, typename T2, typename T3> \
|
||||||
-> decltype(LatticeTrinaryExpression<GRID_TRINOP(name),const T1&,const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(),\
|
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \
|
||||||
std::forward_as_tuple(pred,lhs,rhs)))) \
|
->decltype( \
|
||||||
{\
|
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||||
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(), \
|
const T3 &>(std::make_pair( \
|
||||||
std::forward_as_tuple(pred,lhs, rhs))); \
|
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \
|
||||||
}
|
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \
|
||||||
|
const T3 &>(std::make_pair( \
|
||||||
|
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
|
||||||
|
}
|
||||||
////////////////////////
|
////////////////////////
|
||||||
//Operator definitions
|
// Operator definitions
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
GRID_DEF_UNOP(operator -,UnarySub);
|
GRID_DEF_UNOP(operator-, UnarySub);
|
||||||
GRID_DEF_UNOP(Not,UnaryNot);
|
GRID_DEF_UNOP(Not, UnaryNot);
|
||||||
GRID_DEF_UNOP(operator !,UnaryNot);
|
GRID_DEF_UNOP(operator!, UnaryNot);
|
||||||
GRID_DEF_UNOP(adj,UnaryAdj);
|
GRID_DEF_UNOP(adj, UnaryAdj);
|
||||||
GRID_DEF_UNOP(conjugate,UnaryConj);
|
GRID_DEF_UNOP(conjugate, UnaryConj);
|
||||||
GRID_DEF_UNOP(trace,UnaryTrace);
|
GRID_DEF_UNOP(trace, UnaryTrace);
|
||||||
GRID_DEF_UNOP(transpose,UnaryTranspose);
|
GRID_DEF_UNOP(transpose, UnaryTranspose);
|
||||||
GRID_DEF_UNOP(Ta,UnaryTa);
|
GRID_DEF_UNOP(Ta, UnaryTa);
|
||||||
GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
|
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup);
|
||||||
GRID_DEF_UNOP(real,UnaryReal);
|
GRID_DEF_UNOP(real, UnaryReal);
|
||||||
GRID_DEF_UNOP(imag,UnaryImag);
|
GRID_DEF_UNOP(imag, UnaryImag);
|
||||||
GRID_DEF_UNOP(toReal,UnaryToReal);
|
GRID_DEF_UNOP(toReal, UnaryToReal);
|
||||||
GRID_DEF_UNOP(toComplex,UnaryToComplex);
|
GRID_DEF_UNOP(toComplex, UnaryToComplex);
|
||||||
GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
|
GRID_DEF_UNOP(timesI, UnaryTimesI);
|
||||||
GRID_DEF_UNOP(sqrt ,UnarySqrt);
|
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI);
|
||||||
GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
|
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the
|
||||||
GRID_DEF_UNOP(sin ,UnarySin);
|
// abs-fabs-dabs-labs thing
|
||||||
GRID_DEF_UNOP(cos ,UnaryCos);
|
GRID_DEF_UNOP(sqrt, UnarySqrt);
|
||||||
GRID_DEF_UNOP(log ,UnaryLog);
|
GRID_DEF_UNOP(rsqrt, UnaryRsqrt);
|
||||||
GRID_DEF_UNOP(exp ,UnaryExp);
|
GRID_DEF_UNOP(sin, UnarySin);
|
||||||
|
GRID_DEF_UNOP(cos, UnaryCos);
|
||||||
|
GRID_DEF_UNOP(asin, UnaryAsin);
|
||||||
|
GRID_DEF_UNOP(acos, UnaryAcos);
|
||||||
|
GRID_DEF_UNOP(log, UnaryLog);
|
||||||
|
GRID_DEF_UNOP(exp, UnaryExp);
|
||||||
|
|
||||||
GRID_DEF_BINOP(operator+,BinaryAdd);
|
GRID_DEF_BINOP(operator+, BinaryAdd);
|
||||||
GRID_DEF_BINOP(operator-,BinarySub);
|
GRID_DEF_BINOP(operator-, BinarySub);
|
||||||
GRID_DEF_BINOP(operator*,BinaryMul);
|
GRID_DEF_BINOP(operator*, BinaryMul);
|
||||||
|
GRID_DEF_BINOP(operator/, BinaryDiv);
|
||||||
|
|
||||||
GRID_DEF_BINOP(operator&,BinaryAnd);
|
GRID_DEF_BINOP(operator&, BinaryAnd);
|
||||||
GRID_DEF_BINOP(operator|,BinaryOr);
|
GRID_DEF_BINOP(operator|, BinaryOr);
|
||||||
GRID_DEF_BINOP(operator&&,BinaryAndAnd);
|
GRID_DEF_BINOP(operator&&, BinaryAndAnd);
|
||||||
GRID_DEF_BINOP(operator||,BinaryOrOr);
|
GRID_DEF_BINOP(operator||, BinaryOrOr);
|
||||||
|
|
||||||
GRID_DEF_TRINOP(where,TrinaryWhere);
|
GRID_DEF_TRINOP(where, TrinaryWhere);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Closure convenience to force expression to evaluate
|
// Closure convenience to force expression to evaluate
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
template<class Op,class T1>
|
template <class Op, class T1>
|
||||||
auto closure(const LatticeUnaryExpression<Op,T1> & expr)
|
auto closure(const LatticeUnaryExpression<Op, T1> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))>
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret(
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> ret(expr);
|
expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class Op,class T1, class T2>
|
template <class Op, class T1, class T2>
|
||||||
auto closure(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second))))>
|
eval(0, std::get<1>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
eval(0, std::get<1>(expr.second))))>
|
||||||
eval(0,std::get<1>(expr.second))))> ret(expr);
|
ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
template<class Op,class T1, class T2, class T3>
|
template <class Op, class T1, class T2, class T3>
|
||||||
auto closure(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
|
||||||
-> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second)),
|
eval(0, std::get<1>(expr.second)),
|
||||||
eval(0,std::get<2>(expr.second))))>
|
eval(0, std::get<2>(expr.second))))> {
|
||||||
{
|
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)),
|
||||||
Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
eval(0, std::get<1>(expr.second)),
|
||||||
eval(0,std::get<1>(expr.second)),
|
eval(0, std::get<2>(expr.second))))>
|
||||||
eval(0,std::get<2>(expr.second))))> ret(expr);
|
ret(expr);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -382,12 +433,11 @@ template<class Op,class T1, class T2, class T3>
|
|||||||
#undef GRID_DEF_UNOP
|
#undef GRID_DEF_UNOP
|
||||||
#undef GRID_DEF_BINOP
|
#undef GRID_DEF_BINOP
|
||||||
#undef GRID_DEF_TRINOP
|
#undef GRID_DEF_TRINOP
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
int main(int argc,char **argv){
|
int main(int argc,char **argv){
|
||||||
|
|
||||||
Lattice<double> v1(16);
|
Lattice<double> v1(16);
|
||||||
@ -397,7 +447,7 @@ using namespace Grid;
|
|||||||
BinaryAdd<double,double> tmp;
|
BinaryAdd<double,double> tmp;
|
||||||
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
|
||||||
expr(std::make_pair(tmp,
|
expr(std::make_pair(tmp,
|
||||||
std::forward_as_tuple(v1,v2)));
|
std::forward_as_tuple(v1,v2)));
|
||||||
tmp.func(eval(0,v1),eval(0,v2));
|
tmp.func(eval(0,v1),eval(0,v2));
|
||||||
|
|
||||||
auto var = v1+v2;
|
auto var = v1+v2;
|
||||||
|
@ -1,32 +1,33 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/lattice/Lattice_base.h
|
Source file: ./lib/lattice/Lattice_base.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef GRID_LATTICE_BASE_H
|
#ifndef GRID_LATTICE_BASE_H
|
||||||
#define GRID_LATTICE_BASE_H
|
#define GRID_LATTICE_BASE_H
|
||||||
|
|
||||||
@ -64,9 +65,6 @@ public:
|
|||||||
|
|
||||||
class LatticeExpressionBase {};
|
class LatticeExpressionBase {};
|
||||||
|
|
||||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
|
|
||||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
|
|
||||||
|
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||||
public:
|
public:
|
||||||
@ -101,6 +99,7 @@ public:
|
|||||||
int begin(void) { return 0;};
|
int begin(void) { return 0;};
|
||||||
int end(void) { return _odata.size(); }
|
int end(void) { return _odata.size(); }
|
||||||
vobj & operator[](int i) { return _odata[i]; };
|
vobj & operator[](int i) { return _odata[i]; };
|
||||||
|
const vobj & operator[](int i) const { return _odata[i]; };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
@ -255,6 +254,18 @@ PARALLEL_FOR_LOOP
|
|||||||
checkerboard=0;
|
checkerboard=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Lattice(const Lattice& r){ // copy constructor
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
@ -267,7 +278,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
this->checkerboard = r.checkerboard;
|
this->checkerboard = r.checkerboard;
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
this->_odata[ss]=r._odata[ss];
|
this->_odata[ss]=r._odata[ss];
|
||||||
@ -289,17 +300,6 @@ PARALLEL_FOR_LOOP
|
|||||||
*this = (*this)+r;
|
*this = (*this)+r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
strong_inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
|
||||||
conformable(lhs,rhs);
|
|
||||||
Lattice<vobj> ret(lhs._grid);
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int ss=0;ss<lhs._grid->oSites();ss++){
|
|
||||||
ret._odata[ss] = lhs._odata[ss]*pow(rhs._odata[ss],-1.0);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
};
|
|
||||||
|
|
||||||
}; // class Lattice
|
}; // class Lattice
|
||||||
|
|
||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
@ -324,27 +324,27 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <lattice/Lattice_conformable.h>
|
#include "Lattice_conformable.h"
|
||||||
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
#define GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
|
||||||
#include <lattice/Lattice_ET.h>
|
#include "Lattice_ET.h"
|
||||||
#else
|
#else
|
||||||
#include <lattice/Lattice_overload.h>
|
#include "Lattice_overload.h"
|
||||||
#endif
|
#endif
|
||||||
#include <lattice/Lattice_arith.h>
|
#include "Lattice_arith.h"
|
||||||
#include <lattice/Lattice_trace.h>
|
#include "Lattice_trace.h"
|
||||||
#include <lattice/Lattice_transpose.h>
|
#include "Lattice_transpose.h"
|
||||||
#include <lattice/Lattice_local.h>
|
#include "Lattice_local.h"
|
||||||
#include <lattice/Lattice_reduction.h>
|
#include "Lattice_reduction.h"
|
||||||
#include <lattice/Lattice_peekpoke.h>
|
#include "Lattice_peekpoke.h"
|
||||||
#include <lattice/Lattice_reality.h>
|
#include "Lattice_reality.h"
|
||||||
#include <lattice/Lattice_comparison_utils.h>
|
#include "Lattice_comparison_utils.h"
|
||||||
#include <lattice/Lattice_comparison.h>
|
#include "Lattice_comparison.h"
|
||||||
#include <lattice/Lattice_coordinate.h>
|
#include "Lattice_coordinate.h"
|
||||||
#include <lattice/Lattice_where.h>
|
#include "Lattice_where.h"
|
||||||
#include <lattice/Lattice_rng.h>
|
#include "Lattice_rng.h"
|
||||||
#include <lattice/Lattice_unary.h>
|
#include "Lattice_unary.h"
|
||||||
#include <lattice/Lattice_transfer.h>
|
#include "Lattice_transfer.h"
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -154,7 +154,7 @@ PARALLEL_FOR_LOOP
|
|||||||
template<class vobj,class sobj>
|
template<class vobj,class sobj>
|
||||||
void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
|
void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
|
||||||
|
|
||||||
GridBase *grid=l._grid;
|
GridBase *grid = l._grid;
|
||||||
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
typedef typename vobj::scalar_type scalar_type;
|
||||||
typedef typename vobj::vector_type vector_type;
|
typedef typename vobj::vector_type vector_type;
|
||||||
@ -164,16 +164,18 @@ PARALLEL_FOR_LOOP
|
|||||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||||
|
|
||||||
|
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
int odx,idx;
|
int odx,idx;
|
||||||
idx= grid->iIndex(site);
|
idx= grid->iIndex(site);
|
||||||
odx= grid->oIndex(site);
|
odx= grid->oIndex(site);
|
||||||
|
|
||||||
std::vector<sobj> buf(Nsimd);
|
scalar_type * vp = (scalar_type *)&l._odata[odx];
|
||||||
|
scalar_type * pt = (scalar_type *)&s;
|
||||||
extract(l._odata[odx],buf);
|
|
||||||
|
for(int w=0;w<words;w++){
|
||||||
|
pt[w] = vp[idx+w*Nsimd];
|
||||||
|
}
|
||||||
|
|
||||||
s = buf[idx];
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -190,18 +192,17 @@ PARALLEL_FOR_LOOP
|
|||||||
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
assert( l.checkerboard== l._grid->CheckerBoard(site));
|
||||||
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
assert( sizeof(sobj)*Nsimd == sizeof(vobj));
|
||||||
|
|
||||||
|
static const int words=sizeof(vobj)/sizeof(vector_type);
|
||||||
int odx,idx;
|
int odx,idx;
|
||||||
idx= grid->iIndex(site);
|
idx= grid->iIndex(site);
|
||||||
odx= grid->oIndex(site);
|
odx= grid->oIndex(site);
|
||||||
|
|
||||||
std::vector<sobj> buf(Nsimd);
|
scalar_type * vp = (scalar_type *)&l._odata[odx];
|
||||||
|
scalar_type * pt = (scalar_type *)&s;
|
||||||
// extract-modify-merge cycle is easiest way and this is not perf critical
|
|
||||||
extract(l._odata[odx],buf);
|
|
||||||
|
|
||||||
buf[idx] = s;
|
for(int w=0;w<words;w++){
|
||||||
|
vp[idx+w*Nsimd] = pt[w];
|
||||||
merge(l._odata[odx],buf);
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
@ -40,7 +40,7 @@ namespace Grid {
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||||
ComplexD nrm = innerProduct(arg,arg);
|
ComplexD nrm = innerProduct(arg,arg);
|
||||||
return real(nrm);
|
return std::real(nrm);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
|
@ -294,11 +294,12 @@ namespace Grid {
|
|||||||
int rank,o_idx,i_idx;
|
int rank,o_idx,i_idx;
|
||||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||||
|
|
||||||
int l_idx=generator_idx(o_idx,i_idx);
|
int l_idx=generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
std::vector<int> site_seeds(4);
|
const int num_rand_seed=16;
|
||||||
for(int i=0;i<4;i++){
|
std::vector<int> site_seeds(num_rand_seed);
|
||||||
|
for(int i=0;i<site_seeds.size();i++){
|
||||||
site_seeds[i]= ui(pseeder);
|
site_seeds[i]= ui(pseeder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
|
|||||||
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<ig->lSites();idx++){
|
for(int idx=0;idx<ig->lSites();idx++){
|
||||||
std::vector<int> lcoor(ni);
|
std::vector<int> lcoor(ni);
|
||||||
ig->LocalIndexToLocalCoor(idx,lcoor);
|
ig->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
@ -386,7 +386,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<lg->lSites();idx++){
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
std::vector<int> lcoor(nl);
|
std::vector<int> lcoor(nl);
|
||||||
std::vector<int> hcoor(nh);
|
std::vector<int> hcoor(nh);
|
||||||
@ -420,15 +420,15 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
|
|||||||
assert(hg->_processors[orthog]==1);
|
assert(hg->_processors[orthog]==1);
|
||||||
|
|
||||||
int dl; dl = 0;
|
int dl; dl = 0;
|
||||||
for(int d=0;d<nh;d++){
|
for(int d=0;d<nh;d++){
|
||||||
if ( d != orthog) {
|
if ( d != orthog) {
|
||||||
assert(lg->_processors[dl] == hg->_processors[d]);
|
assert(lg->_processors[dl] == hg->_processors[d]);
|
||||||
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
|
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
|
||||||
dl++;
|
dl++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the above should guarantee that the operations are local
|
// the above should guarantee that the operations are local
|
||||||
PARALLEL_FOR_LOOP
|
//PARALLEL_FOR_LOOP
|
||||||
for(int idx=0;idx<lg->lSites();idx++){
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
std::vector<int> lcoor(nl);
|
std::vector<int> lcoor(nl);
|
||||||
std::vector<int> hcoor(nh);
|
std::vector<int> hcoor(nh);
|
||||||
@ -446,6 +446,79 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
GridBase *lg = lowDim._grid;
|
||||||
|
GridBase *hg = higherDim._grid;
|
||||||
|
int nl = lg->_ndimension;
|
||||||
|
int nh = hg->_ndimension;
|
||||||
|
|
||||||
|
assert(nl == nh);
|
||||||
|
assert(orthog<nh);
|
||||||
|
assert(orthog>=0);
|
||||||
|
|
||||||
|
for(int d=0;d<nh;d++){
|
||||||
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
//PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(nl);
|
||||||
|
std::vector<int> hcoor(nh);
|
||||||
|
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
|
hcoor=lcoor;
|
||||||
|
hcoor[orthog] = slice_hi;
|
||||||
|
peekLocalSite(s,lowDim,lcoor);
|
||||||
|
pokeLocalSite(s,higherDim,hcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class vobj>
|
||||||
|
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
|
||||||
|
{
|
||||||
|
typedef typename vobj::scalar_object sobj;
|
||||||
|
sobj s;
|
||||||
|
|
||||||
|
GridBase *lg = lowDim._grid;
|
||||||
|
GridBase *hg = higherDim._grid;
|
||||||
|
int nl = lg->_ndimension;
|
||||||
|
int nh = hg->_ndimension;
|
||||||
|
|
||||||
|
assert(nl == nh);
|
||||||
|
assert(orthog<nh);
|
||||||
|
assert(orthog>=0);
|
||||||
|
|
||||||
|
for(int d=0;d<nh;d++){
|
||||||
|
assert(lg->_processors[d] == hg->_processors[d]);
|
||||||
|
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// the above should guarantee that the operations are local
|
||||||
|
//PARALLEL_FOR_LOOP
|
||||||
|
for(int idx=0;idx<lg->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(nl);
|
||||||
|
std::vector<int> hcoor(nh);
|
||||||
|
lg->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
if( lcoor[orthog] == slice_lo ) {
|
||||||
|
hcoor=lcoor;
|
||||||
|
hcoor[orthog] = slice_hi;
|
||||||
|
peekLocalSite(s,higherDim,hcoor);
|
||||||
|
pokeLocalSite(s,lowDim,lcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
||||||
{
|
{
|
||||||
@ -482,6 +555,96 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||||
|
template<typename vobj, typename sobj>
|
||||||
|
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){
|
||||||
|
typedef typename vobj::vector_type vtype;
|
||||||
|
|
||||||
|
GridBase* in_grid = in._grid;
|
||||||
|
out.resize(in_grid->lSites());
|
||||||
|
|
||||||
|
int ndim = in_grid->Nd();
|
||||||
|
int in_nsimd = vtype::Nsimd();
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||||
|
|
||||||
|
for(int lane=0; lane < in_nsimd; lane++){
|
||||||
|
in_icoor[lane].resize(ndim);
|
||||||
|
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||||
|
//Assemble vector of pointers to output elements
|
||||||
|
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||||
|
|
||||||
|
std::vector<int> in_ocoor(ndim);
|
||||||
|
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||||
|
|
||||||
|
std::vector<int> lcoor(in_grid->Nd());
|
||||||
|
|
||||||
|
for(int lane=0; lane < in_nsimd; lane++){
|
||||||
|
for(int mu=0;mu<ndim;mu++)
|
||||||
|
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||||
|
|
||||||
|
int lex;
|
||||||
|
Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions);
|
||||||
|
out_ptrs[lane] = &out[lex];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Unpack into those ptrs
|
||||||
|
const vobj & in_vobj = in._odata[in_oidx];
|
||||||
|
extract1(in_vobj, out_ptrs, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Convert a Lattice from one precision to another
|
||||||
|
template<class VobjOut, class VobjIn>
|
||||||
|
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||||
|
assert(out._grid->Nd() == in._grid->Nd());
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
GridBase *in_grid=in._grid;
|
||||||
|
GridBase *out_grid = out._grid;
|
||||||
|
|
||||||
|
typedef typename VobjOut::scalar_object SobjOut;
|
||||||
|
typedef typename VobjIn::scalar_object SobjIn;
|
||||||
|
|
||||||
|
int ndim = out._grid->Nd();
|
||||||
|
int out_nsimd = out_grid->Nsimd();
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > out_icoor(out_nsimd);
|
||||||
|
|
||||||
|
for(int lane=0; lane < out_nsimd; lane++){
|
||||||
|
out_icoor[lane].resize(ndim);
|
||||||
|
out_grid->iCoorFromIindex(out_icoor[lane], lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
|
||||||
|
unvectorizeToLexOrdArray(in_slex_conv, in);
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
|
||||||
|
std::vector<int> out_ocoor(ndim);
|
||||||
|
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
|
||||||
|
|
||||||
|
std::vector<SobjOut*> ptrs(out_nsimd);
|
||||||
|
|
||||||
|
std::vector<int> lcoor(out_grid->Nd());
|
||||||
|
|
||||||
|
for(int lane=0; lane < out_nsimd; lane++){
|
||||||
|
for(int mu=0;mu<ndim;mu++)
|
||||||
|
lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu];
|
||||||
|
|
||||||
|
int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
|
||||||
|
ptrs[lane] = &in_slex_conv[llex];
|
||||||
|
}
|
||||||
|
merge(out._odata[out_oidx], ptrs, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -194,22 +194,22 @@ class BinaryIO {
|
|||||||
|
|
||||||
std::vector<int> site({x,y,z,t});
|
std::vector<int> site({x,y,z,t});
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if (grid->IsBoss()) {
|
||||||
fin.read((char *)&file_object,sizeof(file_object));
|
fin.read((char *)&file_object, sizeof(file_object));
|
||||||
bytes += sizeof(file_object);
|
bytes += sizeof(file_object);
|
||||||
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
|
||||||
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
|
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
|
||||||
|
|
||||||
munge(file_object,munged,csum);
|
munge(file_object, munged, csum);
|
||||||
}
|
}
|
||||||
// The boss who read the file has their value poked
|
// The boss who read the file has their value poked
|
||||||
pokeSite(munged,Umu,site);
|
pokeSite(munged,Umu,site);
|
||||||
}}}}
|
}}}}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -254,20 +254,20 @@ class BinaryIO {
|
|||||||
|
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
|
|
||||||
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
|
||||||
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
|
||||||
|
|
||||||
// NB could gather an xstrip as an optimisation.
|
// NB could gather an xstrip as an optimisation.
|
||||||
fout.write((char *)&file_object,sizeof(file_object));
|
fout.write((char *)&file_object,sizeof(file_object));
|
||||||
bytes+=sizeof(file_object);
|
bytes+=sizeof(file_object);
|
||||||
}
|
}
|
||||||
}}}}
|
}}}}
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -305,15 +305,15 @@ class BinaryIO {
|
|||||||
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
if( rank == grid->ThisRank() ){
|
if( rank == grid->ThisRank() ){
|
||||||
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
|
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
|
||||||
parallel.GetState(saved,l_idx);
|
parallel.GetState(saved,l_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Broadcast(rank,(void *)&saved[0],bytes);
|
grid->Broadcast(rank,(void *)&saved[0],bytes);
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
||||||
fout.write((char *)&saved[0],bytes);
|
fout.write((char *)&saved[0],bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -355,14 +355,14 @@ class BinaryIO {
|
|||||||
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
int l_idx=parallel.generator_idx(o_idx,i_idx);
|
||||||
|
|
||||||
if ( grid->IsBoss() ) {
|
if ( grid->IsBoss() ) {
|
||||||
fin.read((char *)&saved[0],bytes);
|
fin.read((char *)&saved[0],bytes);
|
||||||
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Broadcast(0,(void *)&saved[0],bytes);
|
grid->Broadcast(0,(void *)&saved[0],bytes);
|
||||||
|
|
||||||
if( rank == grid->ThisRank() ){
|
if( rank == grid->ThisRank() ){
|
||||||
parallel.SetState(saved,l_idx);
|
parallel.SetState(saved,l_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -415,15 +415,15 @@ class BinaryIO {
|
|||||||
|
|
||||||
if ( d == 0 ) parallel[d] = 0;
|
if ( d == 0 ) parallel[d] = 0;
|
||||||
if (parallel[d]) {
|
if (parallel[d]) {
|
||||||
range[d] = grid->_ldimensions[d];
|
range[d] = grid->_ldimensions[d];
|
||||||
start[d] = grid->_processor_coor[d]*range[d];
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
ioproc[d]= grid->_processor_coor[d];
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
} else {
|
} else {
|
||||||
range[d] = grid->_gdimensions[d];
|
range[d] = grid->_gdimensions[d];
|
||||||
start[d] = 0;
|
start[d] = 0;
|
||||||
ioproc[d]= 0;
|
ioproc[d]= 0;
|
||||||
|
|
||||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
}
|
}
|
||||||
slice_vol = slice_vol * range[d];
|
slice_vol = slice_vol * range[d];
|
||||||
}
|
}
|
||||||
@ -434,9 +434,9 @@ class BinaryIO {
|
|||||||
std::cout<< std::dec ;
|
std::cout<< std::dec ;
|
||||||
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
for(int d=0;d<grid->_ndimension;d++){
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
std::cout<< range[d];
|
std::cout<< range[d];
|
||||||
if( d< grid->_ndimension-1 )
|
if( d< grid->_ndimension-1 )
|
||||||
std::cout<< " x ";
|
std::cout<< " x ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
@ -457,13 +457,13 @@ class BinaryIO {
|
|||||||
// available (how short sighted is that?)
|
// available (how short sighted is that?)
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
Umu = zero;
|
Umu = zero;
|
||||||
static uint32_t csum=0;
|
static uint32_t csum; csum=0;
|
||||||
fobj fileObj;
|
fobj fileObj;
|
||||||
static sobj siteObj; // Static to place in symmetric region for SHMEM
|
static sobj siteObj; // Static to place in symmetric region for SHMEM
|
||||||
|
|
||||||
// need to implement these loops in Nd independent way with a lexico conversion
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
std::vector<int> tsite(nd); // temporary mixed up site
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
std::vector<int> gsite(nd);
|
std::vector<int> gsite(nd);
|
||||||
std::vector<int> lsite(nd);
|
std::vector<int> lsite(nd);
|
||||||
@ -472,8 +472,8 @@ class BinaryIO {
|
|||||||
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
gsite[d] = tsite[d]+start[d]; // global site
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
@ -487,29 +487,29 @@ class BinaryIO {
|
|||||||
// iorank reads from the seek
|
// iorank reads from the seek
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (myrank == iorank) {
|
if (myrank == iorank) {
|
||||||
|
|
||||||
fin.seekg(offset+g_idx*sizeof(fileObj));
|
fin.seekg(offset+g_idx*sizeof(fileObj));
|
||||||
fin.read((char *)&fileObj,sizeof(fileObj));
|
fin.read((char *)&fileObj,sizeof(fileObj));
|
||||||
bytes+=sizeof(fileObj);
|
bytes+=sizeof(fileObj);
|
||||||
|
|
||||||
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
munge(fileObj,siteObj,csum);
|
munge(fileObj,siteObj,csum);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Possibly do transport through pt2pt
|
// Possibly do transport through pt2pt
|
||||||
if ( rank != iorank ) {
|
if ( rank != iorank ) {
|
||||||
if ( (myrank == rank) || (myrank==iorank) ) {
|
if ( (myrank == rank) || (myrank==iorank) ) {
|
||||||
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
|
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Poke at destination
|
// Poke at destination
|
||||||
if ( myrank == rank ) {
|
if ( myrank == rank ) {
|
||||||
pokeLocalSite(siteObj,Umu,lsite);
|
pokeLocalSite(siteObj,Umu,lsite);
|
||||||
}
|
}
|
||||||
grid->Barrier(); // necessary?
|
grid->Barrier(); // necessary?
|
||||||
}
|
}
|
||||||
@ -520,7 +520,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
@ -558,15 +558,15 @@ class BinaryIO {
|
|||||||
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
|
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
|
||||||
|
|
||||||
if (parallel[d]) {
|
if (parallel[d]) {
|
||||||
range[d] = grid->_ldimensions[d];
|
range[d] = grid->_ldimensions[d];
|
||||||
start[d] = grid->_processor_coor[d]*range[d];
|
start[d] = grid->_processor_coor[d]*range[d];
|
||||||
ioproc[d]= grid->_processor_coor[d];
|
ioproc[d]= grid->_processor_coor[d];
|
||||||
} else {
|
} else {
|
||||||
range[d] = grid->_gdimensions[d];
|
range[d] = grid->_gdimensions[d];
|
||||||
start[d] = 0;
|
start[d] = 0;
|
||||||
ioproc[d]= 0;
|
ioproc[d]= 0;
|
||||||
|
|
||||||
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
slice_vol = slice_vol * range[d];
|
slice_vol = slice_vol * range[d];
|
||||||
@ -577,9 +577,9 @@ class BinaryIO {
|
|||||||
grid->GlobalSum(tmp);
|
grid->GlobalSum(tmp);
|
||||||
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
|
||||||
for(int d=0;d<grid->_ndimension;d++){
|
for(int d=0;d<grid->_ndimension;d++){
|
||||||
std::cout<< range[d];
|
std::cout<< range[d];
|
||||||
if( d< grid->_ndimension-1 )
|
if( d< grid->_ndimension-1 )
|
||||||
std::cout<< " x ";
|
std::cout<< " x ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
@ -610,7 +610,7 @@ class BinaryIO {
|
|||||||
// should aggregate a whole chunk and then write.
|
// should aggregate a whole chunk and then write.
|
||||||
// need to implement these loops in Nd independent way with a lexico conversion
|
// need to implement these loops in Nd independent way with a lexico conversion
|
||||||
for(int tlex=0;tlex<slice_vol;tlex++){
|
for(int tlex=0;tlex<slice_vol;tlex++){
|
||||||
|
|
||||||
std::vector<int> tsite(nd); // temporary mixed up site
|
std::vector<int> tsite(nd); // temporary mixed up site
|
||||||
std::vector<int> gsite(nd);
|
std::vector<int> gsite(nd);
|
||||||
std::vector<int> lsite(nd);
|
std::vector<int> lsite(nd);
|
||||||
@ -619,8 +619,8 @@ class BinaryIO {
|
|||||||
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
Lexicographic::CoorFromIndex(tsite,tlex,range);
|
||||||
|
|
||||||
for(int d=0;d<nd;d++){
|
for(int d=0;d<nd;d++){
|
||||||
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
|
||||||
gsite[d] = tsite[d]+start[d]; // global site
|
gsite[d] = tsite[d]+start[d]; // global site
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -640,26 +640,26 @@ class BinaryIO {
|
|||||||
|
|
||||||
// Pair of nodes may need to do pt2pt send
|
// Pair of nodes may need to do pt2pt send
|
||||||
if ( rank != iorank ) { // comms is necessary
|
if ( rank != iorank ) { // comms is necessary
|
||||||
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
|
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
|
||||||
// Send to IOrank
|
// Send to IOrank
|
||||||
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
|
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->Barrier(); // necessary?
|
grid->Barrier(); // necessary?
|
||||||
|
|
||||||
if (myrank == iorank) {
|
if (myrank == iorank) {
|
||||||
|
|
||||||
munge(siteObj,fileObj,csum);
|
munge(siteObj,fileObj,csum);
|
||||||
|
|
||||||
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
|
||||||
|
|
||||||
fout.seekp(offset+g_idx*sizeof(fileObj));
|
fout.seekp(offset+g_idx*sizeof(fileObj));
|
||||||
fout.write((char *)&fileObj,sizeof(fileObj));
|
fout.write((char *)&fileObj,sizeof(fileObj));
|
||||||
bytes+=sizeof(fileObj);
|
bytes+=sizeof(fileObj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -668,7 +668,7 @@ class BinaryIO {
|
|||||||
|
|
||||||
timer.Stop();
|
timer.Stop();
|
||||||
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
|
||||||
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
|
||||||
|
|
||||||
return csum;
|
return csum;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Include user configuration file (this can define various configuration macros)
|
// Include user configuration file (this can define various configuration macros)
|
||||||
#include <pugixml/pugiconfig.hpp>
|
#include "pugiconfig.hpp"
|
||||||
|
|
||||||
#ifndef HEADER_PUGIXML_HPP
|
#ifndef HEADER_PUGIXML_HPP
|
||||||
#define HEADER_PUGIXML_HPP
|
#define HEADER_PUGIXML_HPP
|
||||||
|
@ -55,10 +55,19 @@ namespace QCD {
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// QCD iMatrix types
|
// QCD iMatrix types
|
||||||
// Index conventions: Lorentz x Spin x Colour
|
// Index conventions: Lorentz x Spin x Colour
|
||||||
|
// note: static const int or constexpr will work for type deductions
|
||||||
|
// with the intel compiler (up to version 17)
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
static const int ColourIndex = 2;
|
#define ColourIndex 2
|
||||||
static const int SpinIndex = 1;
|
#define SpinIndex 1
|
||||||
static const int LorentzIndex= 0;
|
#define LorentzIndex 0
|
||||||
|
|
||||||
|
|
||||||
|
// Also should make these a named enum type
|
||||||
|
static const int DaggerNo=0;
|
||||||
|
static const int DaggerYes=1;
|
||||||
|
static const int InverseNo=0;
|
||||||
|
static const int InverseYes=1;
|
||||||
|
|
||||||
// Useful traits is this a spin index
|
// Useful traits is this a spin index
|
||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
|
||||||
@ -484,16 +493,27 @@ namespace QCD {
|
|||||||
} //namespace QCD
|
} //namespace QCD
|
||||||
} // Grid
|
} // Grid
|
||||||
|
|
||||||
#include <qcd/utils/SpaceTimeGrid.h>
|
|
||||||
#include <qcd/spin/Dirac.h>
|
#include <Grid/qcd/utils/SpaceTimeGrid.h>
|
||||||
#include <qcd/spin/TwoSpinor.h>
|
#include <Grid/qcd/spin/Dirac.h>
|
||||||
#include <qcd/utils/LinalgUtils.h>
|
#include <Grid/qcd/spin/TwoSpinor.h>
|
||||||
#include <qcd/utils/CovariantCshift.h>
|
#include <Grid/qcd/utils/LinalgUtils.h>
|
||||||
#include <qcd/utils/SUn.h>
|
#include <Grid/qcd/utils/CovariantCshift.h>
|
||||||
#include <qcd/action/Actions.h>
|
|
||||||
#include <qcd/hmc/integrators/Integrator.h>
|
// Include representations
|
||||||
#include <qcd/hmc/integrators/Integrator_algorithm.h>
|
#include <Grid/qcd/utils/SUn.h>
|
||||||
#include <qcd/hmc/HMC.h>
|
#include <Grid/qcd/utils/SUnAdjoint.h>
|
||||||
|
#include <Grid/qcd/utils/SUnTwoIndex.h>
|
||||||
|
#include <Grid/qcd/representations/hmc_types.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/action/Actions.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/smearing/Smearing.h>
|
||||||
|
|
||||||
|
#include <Grid/qcd/hmc/integrators/Integrator.h>
|
||||||
|
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
|
||||||
|
#include <Grid/qcd/hmc/HMC.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,86 +1,153 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/ActionBase.h
|
Source file: ./lib/qcd/action/ActionBase.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: neo <cossu@post.kek.jp>
|
Author: neo <cossu@post.kek.jp>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#ifndef QCD_ACTION_BASE
|
#ifndef QCD_ACTION_BASE
|
||||||
#define QCD_ACTION_BASE
|
#define QCD_ACTION_BASE
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD{
|
namespace QCD {
|
||||||
|
|
||||||
template<class GaugeField>
|
|
||||||
class Action {
|
|
||||||
|
|
||||||
|
template <class GaugeField>
|
||||||
|
class Action {
|
||||||
public:
|
public:
|
||||||
|
bool is_smeared = false;
|
||||||
// Boundary conditions? // Heatbath?
|
// Boundary conditions? // Heatbath?
|
||||||
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
|
virtual void refresh(const GaugeField& U,
|
||||||
virtual RealD S (const GaugeField &U) = 0; // evaluate the action
|
GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||||
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
|
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||||
virtual ~Action() {};
|
virtual void deriv(const GaugeField& U,
|
||||||
|
GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||||
|
virtual ~Action(){};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Indexing of tuple types
|
||||||
|
template <class T, class Tuple>
|
||||||
|
struct Index;
|
||||||
|
|
||||||
|
template <class T, class... Types>
|
||||||
|
struct Index<T, std::tuple<T, Types...>> {
|
||||||
|
static const std::size_t value = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T, class U, class... Types>
|
||||||
|
struct Index<T, std::tuple<U, Types...>> {
|
||||||
|
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
|
|
||||||
/*
|
/*
|
||||||
template<class GaugeField, class FermionField>
|
template <class GaugeField>
|
||||||
class PseudoFermionAction : public Action<GaugeField> {
|
struct ActionLevel {
|
||||||
public:
|
public:
|
||||||
FermionField Phi;
|
typedef Action<GaugeField>*
|
||||||
GridParallelRNG &pRNG;
|
ActPtr; // now force the same colours as the rest of the code
|
||||||
GridBase &Grid;
|
|
||||||
|
|
||||||
PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) {
|
//Add supported representations here
|
||||||
};
|
|
||||||
|
|
||||||
virtual void refresh(const GaugeField &gauge) {
|
|
||||||
gaussian(Phi,pRNG);
|
|
||||||
};
|
|
||||||
|
|
||||||
};
|
unsigned int multiplier;
|
||||||
*/
|
|
||||||
|
|
||||||
template<class GaugeField> struct ActionLevel{
|
|
||||||
public:
|
|
||||||
|
|
||||||
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
|
|
||||||
|
|
||||||
int multiplier;
|
|
||||||
|
|
||||||
std::vector<ActPtr> actions;
|
std::vector<ActPtr> actions;
|
||||||
|
|
||||||
ActionLevel(int mul = 1) : multiplier(mul) {
|
ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
|
||||||
assert (mul > 0);
|
assert(mul >= 1);
|
||||||
};
|
};
|
||||||
|
|
||||||
void push_back(ActPtr ptr){
|
void push_back(ActPtr ptr) { actions.push_back(ptr); }
|
||||||
actions.push_back(ptr);
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <class GaugeField, class Repr = NoHirep >
|
||||||
|
struct ActionLevel {
|
||||||
|
public:
|
||||||
|
unsigned int multiplier;
|
||||||
|
|
||||||
|
// Fundamental repr actions separated because of the smearing
|
||||||
|
typedef Action<GaugeField>* ActPtr;
|
||||||
|
|
||||||
|
// construct a tuple of vectors of the actions for the corresponding higher
|
||||||
|
// representation fields
|
||||||
|
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
|
||||||
|
action_collection actions_hirep;
|
||||||
|
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
|
||||||
|
|
||||||
|
std::vector<ActPtr>& actions;
|
||||||
|
|
||||||
|
// Temporary conversion between ActionLevel and ActionLevelHirep
|
||||||
|
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){}
|
||||||
|
|
||||||
|
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
|
||||||
|
// initialize the hirep vectors to zero.
|
||||||
|
//apply(this->resize, actions_hirep, 0); //need a working resize
|
||||||
|
assert(mul >= 1);
|
||||||
|
};
|
||||||
|
|
||||||
|
//void push_back(ActPtr ptr) { actions.push_back(ptr); }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template < class Field >
|
||||||
|
void push_back(Action<Field>* ptr) {
|
||||||
|
// insert only in the correct vector
|
||||||
|
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template < class ActPtr>
|
||||||
|
static void resize(ActPtr ap, unsigned int n){
|
||||||
|
ap->resize(n);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//template <std::size_t I>
|
||||||
|
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
|
||||||
|
|
||||||
|
// Loop on tuple for a callable function
|
||||||
|
template <std::size_t I = 1, typename Callable, typename ...Args>
|
||||||
|
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
|
||||||
|
Callable, Repr& R,Args&...) const {}
|
||||||
|
|
||||||
|
template <std::size_t I = 1, typename Callable, typename ...Args>
|
||||||
|
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
|
||||||
|
Callable fn, Repr& R, Args&... arguments) const {
|
||||||
|
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
|
||||||
|
apply<I + 1>(fn, R, arguments...);
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
|
|
||||||
|
|
||||||
|
//template <class GaugeField>
|
||||||
|
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
|
||||||
|
|
||||||
}}
|
template <class GaugeField, class R>
|
||||||
|
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -40,25 +40,25 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Abstract base interface
|
// Abstract base interface
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/ActionBase.h>
|
#include <Grid/qcd/action/ActionBase.h>
|
||||||
#include <qcd/action/ActionParams.h>
|
#include <Grid/qcd/action/ActionParams.h>
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Utility functions
|
// Utility functions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/gauge/GaugeImpl.h>
|
#include <Grid/qcd/action/gauge/GaugeImpl.h>
|
||||||
#include <qcd/utils/WilsonLoops.h>
|
#include <Grid/qcd/utils/WilsonLoops.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
|
#include <Grid/qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
|
||||||
#include <qcd/action/fermion/FermionOperatorImpl.h>
|
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h>
|
||||||
#include <qcd/action/fermion/FermionOperator.h>
|
#include <Grid/qcd/action/fermion/FermionOperator.h>
|
||||||
#include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Gauge Actions
|
// Gauge Actions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
#include <qcd/action/gauge/WilsonGaugeAction.h>
|
#include <Grid/qcd/action/gauge/WilsonGaugeAction.h>
|
||||||
#include <qcd/action/gauge/PlaqPlusRectangleAction.h>
|
#include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
@ -107,41 +107,64 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
|||||||
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
|
// for EVERY .cc file. This define centralises the list and restores global push of impl cases
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define FermOpTemplateInstantiate(A) \
|
|
||||||
|
#define FermOp4dVecTemplateInstantiate(A) \
|
||||||
template class A<WilsonImplF>; \
|
template class A<WilsonImplF>; \
|
||||||
template class A<WilsonImplD>; \
|
template class A<WilsonImplD>; \
|
||||||
|
template class A<ZWilsonImplF>; \
|
||||||
|
template class A<ZWilsonImplD>; \
|
||||||
template class A<GparityWilsonImplF>; \
|
template class A<GparityWilsonImplF>; \
|
||||||
template class A<GparityWilsonImplD>;
|
template class A<GparityWilsonImplD>;
|
||||||
|
|
||||||
|
#define AdjointFermOpTemplateInstantiate(A) \
|
||||||
|
template class A<WilsonAdjImplF>; \
|
||||||
|
template class A<WilsonAdjImplD>;
|
||||||
|
|
||||||
|
#define TwoIndexFermOpTemplateInstantiate(A) \
|
||||||
|
template class A<WilsonTwoIndexSymmetricImplF>; \
|
||||||
|
template class A<WilsonTwoIndexSymmetricImplD>;
|
||||||
|
|
||||||
|
#define FermOp5dVecTemplateInstantiate(A) \
|
||||||
|
template class A<DomainWallVec5dImplF>; \
|
||||||
|
template class A<DomainWallVec5dImplD>; \
|
||||||
|
template class A<ZDomainWallVec5dImplF>; \
|
||||||
|
template class A<ZDomainWallVec5dImplD>;
|
||||||
|
|
||||||
|
#define FermOpTemplateInstantiate(A) \
|
||||||
|
FermOp4dVecTemplateInstantiate(A) \
|
||||||
|
FermOp5dVecTemplateInstantiate(A)
|
||||||
|
|
||||||
|
|
||||||
#define GparityFermOpTemplateInstantiate(A)
|
#define GparityFermOpTemplateInstantiate(A)
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Fermion operators / actions
|
// Fermion operators / actions
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
|
|
||||||
#include <qcd/action/fermion/WilsonFermion.h> // 4d wilson like
|
#include <Grid/qcd/action/fermion/WilsonFermion.h> // 4d wilson like
|
||||||
#include <qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
|
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
|
||||||
#include <qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
||||||
|
|
||||||
//#include <qcd/action/fermion/CloverFermion.h>
|
//#include <Grid/qcd/action/fermion/CloverFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types
|
||||||
#include <qcd/action/fermion/DomainWallFermion.h>
|
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <qcd/action/fermion/DomainWallFermion.h>
|
#include <Grid/qcd/action/fermion/DomainWallFermion.h>
|
||||||
#include <qcd/action/fermion/MobiusFermion.h>
|
#include <Grid/qcd/action/fermion/MobiusFermion.h>
|
||||||
#include <qcd/action/fermion/ScaledShamirFermion.h>
|
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
|
||||||
#include <qcd/action/fermion/MobiusZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
|
||||||
#include <qcd/action/fermion/ShamirZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
|
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
|
||||||
|
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
|
// More maintainable to maintain the following typedef list centrally, as more "impl" targets
|
||||||
@ -157,6 +180,14 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
|
|||||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
|
||||||
|
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
|
||||||
|
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonTwoIndexSymmetricImplR> WilsonTwoIndexSymmetricFermionR;
|
||||||
|
typedef WilsonFermion<WilsonTwoIndexSymmetricImplF> WilsonTwoIndexSymmetricFermionF;
|
||||||
|
typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermionD;
|
||||||
|
|
||||||
typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR;
|
typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR;
|
||||||
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
|
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
|
||||||
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
|
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
|
||||||
@ -167,6 +198,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
|
|||||||
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
typedef MobiusFermion<WilsonImplR> MobiusFermionR;
|
||||||
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
typedef MobiusFermion<WilsonImplF> MobiusFermionF;
|
||||||
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
typedef MobiusFermion<WilsonImplD> MobiusFermionD;
|
||||||
|
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
|
||||||
|
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
|
||||||
|
|
||||||
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
|
||||||
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
|
||||||
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
|
||||||
@ -222,21 +258,21 @@ typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
#include <qcd/action/fermion/g5HermitianLinop.h>
|
#include <Grid/qcd/action/fermion/g5HermitianLinop.h>
|
||||||
|
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// Pseudo fermion combinations for HMC
|
// Pseudo fermion combinations for HMC
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
#include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
|
#include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
|
||||||
|
|
||||||
#include <qcd/action/pseudofermion/TwoFlavour.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavour.h>
|
||||||
#include <qcd/action/pseudofermion/TwoFlavourRatio.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h>
|
||||||
#include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
|
||||||
#include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
|
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
|
||||||
|
|
||||||
#include <qcd/action/pseudofermion/OneFlavourRational.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourRational.h>
|
||||||
#include <qcd/action/pseudofermion/OneFlavourRationalRatio.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h>
|
||||||
#include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
|
||||||
#include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
|
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -28,7 +28,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
@ -45,486 +48,376 @@ namespace QCD {
|
|||||||
FourDimGrid,
|
FourDimGrid,
|
||||||
FourDimRedBlackGrid,_M5,p),
|
FourDimRedBlackGrid,_M5,p),
|
||||||
mass(_mass)
|
mass(_mass)
|
||||||
{
|
{ }
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
void CayleyFermion5D<Impl>::Dminus(const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
// Assemble Din
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
FermionField tmp(psi._grid);
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
if ( s==0 ) {
|
this->DW(psi,tmp,DaggerNo);
|
||||||
// Din = bs psi[s] + cs[s] psi[s+1}
|
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
for(int s=0;s<Ls;s++){
|
||||||
// Din+= -mass*cs[s] psi[s+1}
|
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],tmp,s,s);// chi = (1-c[s] D_W) psi
|
||||||
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
|
||||||
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
template<class Impl>
|
}
|
||||||
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
template<class Impl>
|
||||||
{
|
void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi)
|
||||||
int Ls=this->Ls;
|
{
|
||||||
for(int s=0;s<Ls;s++){
|
int Ls=this->Ls;
|
||||||
if ( s==0 ) {
|
FermionField tmp(psi._grid);
|
||||||
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
|
this->DW(psi,tmp,DaggerYes);
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
|
for(int s=0;s<Ls;s++){
|
||||||
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
|
axpby_ssp(chi,Coeff_t(1.0),psi,-cs[s],tmp,s,s);// chi = (1-c[s] D_W) psi
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// override multiply
|
template<class Impl>
|
||||||
template<class Impl>
|
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
||||||
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
{
|
||||||
{
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
std::vector<Coeff_t> diag (Ls,1.0);
|
||||||
|
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
|
||||||
FermionField Din(psi._grid);
|
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
|
||||||
|
M5D(psi,chi,chi,lower,diag,upper);
|
||||||
// Assemble Din
|
}
|
||||||
/*
|
template<class Impl>
|
||||||
for(int s=0;s<Ls;s++){
|
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
|
||||||
if ( s==0 ) {
|
{
|
||||||
// Din = bs psi[s] + cs[s] psi[s+1}
|
int Ls=this->Ls;
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
std::vector<Coeff_t> diag = bs;
|
||||||
// Din+= -mass*cs[s] psi[s+1}
|
std::vector<Coeff_t> upper= cs;
|
||||||
axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
|
std::vector<Coeff_t> lower= cs;
|
||||||
} else if ( s==(Ls-1)) {
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
|
lower[0] =-mass*lower[0];
|
||||||
axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
|
M5D(psi,psi,Din,lower,diag,upper);
|
||||||
} else {
|
}
|
||||||
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
|
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
|
||||||
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
|
{
|
||||||
}
|
int Ls=this->Ls;
|
||||||
}
|
std::vector<Coeff_t> diag = beo;
|
||||||
*/
|
std::vector<Coeff_t> upper(Ls);
|
||||||
Meooe5D(psi,Din);
|
std::vector<Coeff_t> lower(Ls);
|
||||||
|
for(int i=0;i<Ls;i++) {
|
||||||
this->DW(Din,chi,DaggerNo);
|
upper[i]=-ceo[i];
|
||||||
// ((b D_W + D_w hop terms +1) on s-diag
|
lower[i]=-ceo[i];
|
||||||
axpby(chi,1.0,1.0,chi,psi);
|
|
||||||
|
|
||||||
// Call Mooee??
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
if ( s==0 ){
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return norm2(chi);
|
|
||||||
}
|
}
|
||||||
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
template<class Impl>
|
lower[0] =-mass*lower[0];
|
||||||
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
M5D(psi,psi,chi,lower,diag,upper);
|
||||||
{
|
}
|
||||||
// Under adjoint
|
template<class Impl>
|
||||||
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
||||||
//D2- P+ D2+ P-D1-^dag D2+dag
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
FermionField Din(psi._grid);
|
std::vector<Coeff_t> diag = bee;
|
||||||
// Apply Dw
|
std::vector<Coeff_t> upper(Ls);
|
||||||
this->DW(psi,Din,DaggerYes);
|
std::vector<Coeff_t> lower(Ls);
|
||||||
|
for(int i=0;i<Ls;i++) {
|
||||||
MeooeDag5D(Din,chi);
|
upper[i]=-cee[i];
|
||||||
|
lower[i]=-cee[i];
|
||||||
int Ls=this->Ls;
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
|
|
||||||
// Collect the terms in DW
|
|
||||||
// Chi = bs Din[s] + cs[s] Din[s+1}
|
|
||||||
// Chi+= -mass*cs[s] psi[s+1}
|
|
||||||
/*
|
|
||||||
if ( s==0 ) {
|
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// FIXME just call MooeeDag??
|
|
||||||
|
|
||||||
// Collect the terms indept of DW
|
|
||||||
if ( s==0 ){
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// ((b D_W + D_w hop terms +1) on s-diag
|
|
||||||
axpby (chi,1.0,1.0,chi,psi);
|
|
||||||
return norm2(chi);
|
|
||||||
}
|
}
|
||||||
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
|
lower[0] =-mass*lower[0];
|
||||||
|
M5D(psi,psi,chi,lower,diag,upper);
|
||||||
|
}
|
||||||
|
|
||||||
// half checkerboard operations
|
template<class Impl>
|
||||||
template<class Impl>
|
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
||||||
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
{
|
||||||
{
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
std::vector<Coeff_t> diag = bee;
|
||||||
|
std::vector<Coeff_t> upper(Ls);
|
||||||
|
std::vector<Coeff_t> lower(Ls);
|
||||||
|
|
||||||
FermionField tmp(psi._grid);
|
for (int s=0;s<Ls;s++){
|
||||||
// Assemble the 5d matrix
|
// Assemble the 5d matrix
|
||||||
Meooe5D(psi,tmp);
|
if ( s==0 ) {
|
||||||
#if 0
|
upper[s] = -cee[s+1] ;
|
||||||
std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
|
lower[s] = mass*cee[Ls-1];
|
||||||
for(int s=0;s<Ls;s++){
|
} else if ( s==(Ls-1)) {
|
||||||
if ( s==0 ) {
|
upper[s] = mass*cee[0];
|
||||||
// tmp = bs psi[s] + cs[s] psi[s+1}
|
lower[s] = -cee[s-1];
|
||||||
// tmp+= -mass*cs[s] psi[s+1}
|
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
|
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
|
|
||||||
} else if ( s==(Ls-1)) {
|
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Apply 4d dslash
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
|
||||||
this->DhopEO(tmp,chi,DaggerNo);
|
|
||||||
} else {
|
} else {
|
||||||
this->DhopOE(tmp,chi,DaggerNo);
|
upper[s]=-cee[s+1];
|
||||||
|
lower[s]=-cee[s-1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
M5Ddag(psi,psi,chi,lower,diag,upper);
|
||||||
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
}
|
||||||
{
|
|
||||||
FermionField tmp(psi._grid);
|
|
||||||
// Apply 4d dslash
|
|
||||||
if ( psi.checkerboard == Odd ) {
|
|
||||||
this->DhopEO(psi,tmp,DaggerYes);
|
|
||||||
} else {
|
|
||||||
this->DhopOE(psi,tmp,DaggerYes);
|
|
||||||
}
|
|
||||||
|
|
||||||
MeooeDag5D(tmp,chi);
|
template<class Impl>
|
||||||
#if 0
|
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
|
||||||
std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
|
{
|
||||||
// Assemble the 5d matrix
|
int Ls=this->Ls;
|
||||||
int Ls=this->Ls;
|
std::vector<Coeff_t> diag(Ls,1.0);
|
||||||
for(int s=0;s<Ls;s++){
|
std::vector<Coeff_t> upper(Ls,-1.0);
|
||||||
if ( s==0 ) {
|
std::vector<Coeff_t> lower(Ls,-1.0);
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp, -ceo[s+1] ,tmp,s,s+1);
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
axpby_ssp_pminus(chi, 1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1);
|
lower[0] =-mass*lower[0];
|
||||||
} else if ( s==(Ls-1)) {
|
M5Ddag(psi,chi,chi,lower,diag,upper);
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0);
|
}
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0 ,chi,-ceo[s-1],tmp,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
std::vector<Coeff_t> diag =bs;
|
||||||
|
std::vector<Coeff_t> upper=cs;
|
||||||
|
std::vector<Coeff_t> lower=cs;
|
||||||
|
upper[Ls-1]=-mass*upper[Ls-1];
|
||||||
|
lower[0] =-mass*lower[0];
|
||||||
|
M5Ddag(psi,psi,Din,lower,diag,upper);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
FermionField Din(psi._grid);
|
||||||
|
|
||||||
|
// Assemble Din
|
||||||
|
Meooe5D(psi,Din);
|
||||||
|
|
||||||
|
this->DW(Din,chi,DaggerNo);
|
||||||
|
// ((b D_W + D_w hop terms +1) on s-diag
|
||||||
|
axpby(chi,1.0,1.0,chi,psi);
|
||||||
|
|
||||||
|
M5D(psi,chi);
|
||||||
|
return(norm2(chi));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
// Under adjoint
|
||||||
|
//D1+ D1- P- -> D1+^dag P+ D2-^dag
|
||||||
|
//D2- P+ D2+ P-D1-^dag D2+dag
|
||||||
|
|
||||||
|
FermionField Din(psi._grid);
|
||||||
|
// Apply Dw
|
||||||
|
this->DW(psi,Din,DaggerYes);
|
||||||
|
|
||||||
|
MeooeDag5D(Din,chi);
|
||||||
|
|
||||||
|
M5Ddag(psi,chi);
|
||||||
|
// ((b D_W + D_w hop terms +1) on s-diag
|
||||||
|
axpby (chi,1.0,1.0,chi,psi);
|
||||||
|
return norm2(chi);
|
||||||
|
}
|
||||||
|
|
||||||
|
// half checkerboard operations
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
FermionField tmp(psi._grid);
|
||||||
|
|
||||||
|
Meooe5D(psi,tmp);
|
||||||
|
|
||||||
|
if ( psi.checkerboard == Odd ) {
|
||||||
|
this->DhopEO(tmp,chi,DaggerNo);
|
||||||
|
} else {
|
||||||
|
this->DhopOE(tmp,chi,DaggerNo);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
FermionField tmp(psi._grid);
|
||||||
for (int s=0;s<Ls;s++){
|
// Apply 4d dslash
|
||||||
if ( s==0 ) {
|
if ( psi.checkerboard == Odd ) {
|
||||||
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
|
this->DhopEO(psi,tmp,DaggerYes);
|
||||||
axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
|
} else {
|
||||||
} else if ( s==(Ls-1)) {
|
this->DhopOE(psi,tmp,DaggerYes);
|
||||||
axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
MeooeDag5D(tmp,chi);
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
|
||||||
int Ls=this->Ls;
|
FermionField tmp(psi._grid);
|
||||||
FermionField tmp(psi._grid);
|
Meo5D(psi,tmp);
|
||||||
// Assemble the 5d matrix
|
// Apply 4d dslash fragment
|
||||||
for(int s=0;s<Ls;s++){
|
this->DhopDir(tmp,chi,dir,disp);
|
||||||
if ( s==0 ) {
|
}
|
||||||
// tmp = bs psi[s] + cs[s] psi[s+1}
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
// tmp+= -mass*cs[s] psi[s+1}
|
template<class Impl>
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
|
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
|
{
|
||||||
} else if ( s==(Ls-1)) {
|
FermionField Din(V._grid);
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
|
|
||||||
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
if ( dag == DaggerNo ) {
|
||||||
} else {
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
|
Meooe5D(V,Din);
|
||||||
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
|
this->DhopDeriv(mat,U,Din,dag);
|
||||||
}
|
} else {
|
||||||
}
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
// Apply 4d dslash fragment
|
Meooe5D(U,Din);
|
||||||
this->DhopDir(tmp,chi,dir,disp);
|
this->DhopDeriv(mat,Din,V,dag);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
{
|
{
|
||||||
int Ls=this->Ls;
|
FermionField Din(V._grid);
|
||||||
for (int s=0;s<Ls;s++){
|
|
||||||
// Assemble the 5d matrix
|
if ( dag == DaggerNo ) {
|
||||||
if ( s==0 ) {
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1] ,psi,s,s+1);
|
Meooe5D(V,Din);
|
||||||
axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
|
this->DhopDerivOE(mat,U,Din,dag);
|
||||||
} else if ( s==(Ls-1)) {
|
} else {
|
||||||
axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
|
|
||||||
} else {
|
|
||||||
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
|
|
||||||
axpby_ssp_pminus(chi,1.0 ,chi,-cee[s-1],psi,s,s-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
|
||||||
{
|
|
||||||
int Ls=this->Ls;
|
|
||||||
// Apply (L^{\prime})^{-1}
|
|
||||||
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
|
||||||
for (int s=1;s<Ls;s++){
|
|
||||||
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
|
||||||
}
|
|
||||||
// L_m^{-1}
|
|
||||||
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
|
||||||
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
|
|
||||||
}
|
|
||||||
// U_m^{-1} D^{-1}
|
|
||||||
for (int s=0;s<Ls-1;s++){
|
|
||||||
// Chi[s] + 1/d chi[s]
|
|
||||||
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
|
|
||||||
}
|
|
||||||
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
|
||||||
|
|
||||||
// Apply U^{-1}
|
|
||||||
for (int s=Ls-2;s>=0;s--){
|
|
||||||
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
|
||||||
{
|
|
||||||
int Ls=this->Ls;
|
|
||||||
// Apply (U^{\prime})^{-dagger}
|
|
||||||
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
|
||||||
for (int s=1;s<Ls;s++){
|
|
||||||
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
|
|
||||||
}
|
|
||||||
// U_m^{-\dagger}
|
|
||||||
for (int s=0;s<Ls-1;s++){
|
|
||||||
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
|
|
||||||
}
|
|
||||||
// L_m^{-\dagger} D^{-dagger}
|
|
||||||
for (int s=0;s<Ls-1;s++){
|
|
||||||
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
|
|
||||||
}
|
|
||||||
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
|
||||||
|
|
||||||
// Apply L^{-dagger}
|
|
||||||
for (int s=Ls-2;s>=0;s--){
|
|
||||||
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// force terms; five routines; default to Dhop on diagonal
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
FermionField Din(V._grid);
|
|
||||||
|
|
||||||
if ( dag == DaggerNo ) {
|
|
||||||
// U d/du [D_w D5] V = U d/du DW D5 V
|
|
||||||
Meooe5D(V,Din);
|
|
||||||
this->DhopDeriv(mat,U,Din,dag);
|
|
||||||
} else {
|
|
||||||
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
|
||||||
Meooe5D(U,Din);
|
|
||||||
this->DhopDeriv(mat,Din,V,dag);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
FermionField Din(V._grid);
|
|
||||||
|
|
||||||
if ( dag == DaggerNo ) {
|
|
||||||
// U d/du [D_w D5] V = U d/du DW D5 V
|
|
||||||
Meooe5D(V,Din);
|
|
||||||
this->DhopDerivOE(mat,U,Din,dag);
|
|
||||||
} else {
|
|
||||||
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
|
||||||
Meooe5D(U,Din);
|
Meooe5D(U,Din);
|
||||||
this->DhopDerivOE(mat,Din,V,dag);
|
this->DhopDerivOE(mat,Din,V,dag);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
||||||
{
|
{
|
||||||
FermionField Din(V._grid);
|
FermionField Din(V._grid);
|
||||||
|
|
||||||
if ( dag == DaggerNo ) {
|
|
||||||
// U d/du [D_w D5] V = U d/du DW D5 V
|
|
||||||
Meooe5D(V,Din);
|
|
||||||
this->DhopDerivEO(mat,U,Din,dag);
|
|
||||||
} else {
|
|
||||||
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
|
||||||
Meooe5D(U,Din);
|
|
||||||
this->DhopDerivEO(mat,Din,V,dag);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Tanh
|
if ( dag == DaggerNo ) {
|
||||||
template<class Impl>
|
// U d/du [D_w D5] V = U d/du DW D5 V
|
||||||
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
Meooe5D(V,Din);
|
||||||
{
|
this->DhopDerivEO(mat,U,Din,dag);
|
||||||
SetCoefficientsZolotarev(1.0,zdata,b,c);
|
} else {
|
||||||
|
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
|
||||||
|
Meooe5D(U,Din);
|
||||||
|
this->DhopDerivEO(mat,Din,V,dag);
|
||||||
}
|
}
|
||||||
//Zolo
|
};
|
||||||
template<class Impl>
|
|
||||||
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
// Tanh
|
||||||
{
|
template<class Impl>
|
||||||
int Ls=this->Ls;
|
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
std::vector<Coeff_t> gamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
|
||||||
|
SetCoefficientsInternal(1.0,gamma,b,c);
|
||||||
|
}
|
||||||
|
//Zolo
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
std::vector<Coeff_t> gamma(this->Ls);
|
||||||
|
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
|
||||||
|
SetCoefficientsInternal(zolo_hi,gamma,b,c);
|
||||||
|
}
|
||||||
|
//Zolo
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// The Cayley coeffs (unprec)
|
// The Cayley coeffs (unprec)
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
omega.resize(Ls);
|
omega.resize(Ls);
|
||||||
bs.resize(Ls);
|
bs.resize(Ls);
|
||||||
cs.resize(Ls);
|
cs.resize(Ls);
|
||||||
as.resize(Ls);
|
as.resize(Ls);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
|
||||||
|
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
||||||
|
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
|
||||||
|
//
|
||||||
|
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
|
||||||
|
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
|
||||||
|
//
|
||||||
|
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
|
||||||
|
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
|
||||||
|
//
|
||||||
|
// So
|
||||||
|
//
|
||||||
|
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
|
||||||
|
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
||||||
|
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
||||||
|
//
|
||||||
|
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
|
||||||
|
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
|
||||||
|
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
||||||
|
//
|
||||||
|
|
||||||
//
|
double bpc = b+c;
|
||||||
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
|
double bmc = b-c;
|
||||||
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
for(int i=0; i < Ls; i++){
|
||||||
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
|
as[i] = 1.0;
|
||||||
//
|
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
|
||||||
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
|
bs[i] = 0.5*(bpc/omega[i] + bmc);
|
||||||
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
|
cs[i] = 0.5*(bpc/omega[i] - bmc);
|
||||||
//
|
}
|
||||||
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
|
|
||||||
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
|
////////////////////////////////////////////////////////
|
||||||
//
|
// Constants for the preconditioned matrix Cayley form
|
||||||
// So
|
////////////////////////////////////////////////////////
|
||||||
//
|
bee.resize(Ls);
|
||||||
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
|
cee.resize(Ls);
|
||||||
// -(g5 ------- -1 ) ( g5 --------- + 1 )
|
beo.resize(Ls);
|
||||||
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
ceo.resize(Ls);
|
||||||
//
|
|
||||||
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
|
for(int i=0;i<Ls;i++){
|
||||||
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
|
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
|
||||||
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
|
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
|
||||||
//
|
beo[i]=as[i]*bs[i];
|
||||||
|
ceo[i]=-as[i]*cs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
aee.resize(Ls);
|
||||||
|
aeo.resize(Ls);
|
||||||
|
for(int i=0;i<Ls;i++){
|
||||||
|
aee[i]=cee[i];
|
||||||
|
aeo[i]=ceo[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// LDU decomposition of eeoo
|
||||||
|
//////////////////////////////////////////
|
||||||
|
dee.resize(Ls);
|
||||||
|
lee.resize(Ls);
|
||||||
|
leem.resize(Ls);
|
||||||
|
uee.resize(Ls);
|
||||||
|
ueem.resize(Ls);
|
||||||
|
|
||||||
|
for(int i=0;i<Ls;i++){
|
||||||
|
|
||||||
double bpc = b+c;
|
dee[i] = bee[i];
|
||||||
double bmc = b-c;
|
|
||||||
for(int i=0; i < Ls; i++){
|
|
||||||
as[i] = 1.0;
|
|
||||||
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
|
|
||||||
bs[i] = 0.5*(bpc/omega[i] + bmc);
|
|
||||||
cs[i] = 0.5*(bpc/omega[i] - bmc);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
// Constants for the preconditioned matrix Cayley form
|
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
bee.resize(Ls);
|
|
||||||
cee.resize(Ls);
|
|
||||||
beo.resize(Ls);
|
|
||||||
ceo.resize(Ls);
|
|
||||||
|
|
||||||
for(int i=0;i<Ls;i++){
|
if ( i < Ls-1 ) {
|
||||||
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
|
|
||||||
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
|
|
||||||
beo[i]=as[i]*bs[i];
|
|
||||||
ceo[i]=-as[i]*cs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
aee.resize(Ls);
|
|
||||||
aeo.resize(Ls);
|
|
||||||
for(int i=0;i<Ls;i++){
|
|
||||||
aee[i]=cee[i];
|
|
||||||
aeo[i]=ceo[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////
|
|
||||||
// LDU decomposition of eeoo
|
|
||||||
//////////////////////////////////////////
|
|
||||||
dee.resize(Ls);
|
|
||||||
lee.resize(Ls);
|
|
||||||
leem.resize(Ls);
|
|
||||||
uee.resize(Ls);
|
|
||||||
ueem.resize(Ls);
|
|
||||||
|
|
||||||
for(int i=0;i<Ls;i++){
|
|
||||||
|
|
||||||
dee[i] = bee[i];
|
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
|
||||||
|
|
||||||
if ( i < Ls-1 ) {
|
leem[i]=mass*cee[Ls-1]/bee[0];
|
||||||
|
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
|
||||||
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
|
|
||||||
|
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
|
||||||
leem[i]=mass*cee[Ls-1]/bee[0];
|
|
||||||
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
|
ueem[i]=mass;
|
||||||
|
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
|
||||||
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
|
ueem[i]*= aee[0]/bee[0];
|
||||||
|
|
||||||
ueem[i]=mass;
|
} else {
|
||||||
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
|
lee[i] =0.0;
|
||||||
ueem[i]*= aee[0]/bee[0];
|
leem[i]=0.0;
|
||||||
|
uee[i] =0.0;
|
||||||
} else {
|
ueem[i]=0.0;
|
||||||
lee[i] =0.0;
|
|
||||||
leem[i]=0.0;
|
|
||||||
uee[i] =0.0;
|
|
||||||
ueem[i]=0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
double delta_d=mass*cee[Ls-1];
|
|
||||||
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
|
||||||
dee[Ls-1] += delta_d;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Coeff_t delta_d=mass*cee[Ls-1];
|
||||||
|
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
|
||||||
|
dee[Ls-1] += delta_d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(CayleyFermion5D);
|
FermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
||||||
|
@ -51,6 +51,32 @@ namespace Grid {
|
|||||||
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||||
|
virtual void Meo5D (const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
virtual void M5D (const FermionField &psi, FermionField &chi);
|
||||||
|
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
virtual void Dminus(const FermionField &psi, FermionField &chi);
|
||||||
|
virtual void DminusDag(const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
// Instantiate different versions depending on Impl
|
||||||
|
/////////////////////////////////////////////////////
|
||||||
|
void M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper);
|
||||||
|
|
||||||
|
void M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper);
|
||||||
|
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
|
||||||
|
|
||||||
virtual void Instantiatable(void)=0;
|
virtual void Instantiatable(void)=0;
|
||||||
|
|
||||||
// force terms; five routines; default to Dhop on diagonal
|
// force terms; five routines; default to Dhop on diagonal
|
||||||
@ -68,23 +94,23 @@ namespace Grid {
|
|||||||
RealD mass;
|
RealD mass;
|
||||||
|
|
||||||
// Cayley form Moebius (tanh and zolotarev)
|
// Cayley form Moebius (tanh and zolotarev)
|
||||||
std::vector<RealD> omega;
|
std::vector<Coeff_t> omega;
|
||||||
std::vector<RealD> bs; // S dependent coeffs
|
std::vector<Coeff_t> bs; // S dependent coeffs
|
||||||
std::vector<RealD> cs;
|
std::vector<Coeff_t> cs;
|
||||||
std::vector<RealD> as;
|
std::vector<Coeff_t> as;
|
||||||
// For preconditioning Cayley form
|
// For preconditioning Cayley form
|
||||||
std::vector<RealD> bee;
|
std::vector<Coeff_t> bee;
|
||||||
std::vector<RealD> cee;
|
std::vector<Coeff_t> cee;
|
||||||
std::vector<RealD> aee;
|
std::vector<Coeff_t> aee;
|
||||||
std::vector<RealD> beo;
|
std::vector<Coeff_t> beo;
|
||||||
std::vector<RealD> ceo;
|
std::vector<Coeff_t> ceo;
|
||||||
std::vector<RealD> aeo;
|
std::vector<Coeff_t> aeo;
|
||||||
// LDU factorisation of the eeoo matrix
|
// LDU factorisation of the eeoo matrix
|
||||||
std::vector<RealD> lee;
|
std::vector<Coeff_t> lee;
|
||||||
std::vector<RealD> leem;
|
std::vector<Coeff_t> leem;
|
||||||
std::vector<RealD> uee;
|
std::vector<Coeff_t> uee;
|
||||||
std::vector<RealD> ueem;
|
std::vector<Coeff_t> ueem;
|
||||||
std::vector<RealD> dee;
|
std::vector<Coeff_t> dee;
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
CayleyFermion5D(GaugeField &_Umu,
|
CayleyFermion5D(GaugeField &_Umu,
|
||||||
@ -94,12 +120,24 @@ namespace Grid {
|
|||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
|
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#define INSTANTIATE_DPERP(A)\
|
||||||
|
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
||||||
|
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
|
||||||
|
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
|
||||||
|
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
|
||||||
|
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
|
||||||
|
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
|
||||||
|
|
||||||
|
#define CAYLEY_DPERP_CACHE
|
||||||
|
#undef CAYLEY_DPERP_LINALG
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
211
lib/qcd/action/fermion/CayleyFermion5Dcache.cc
Normal file
211
lib/qcd/action/fermion/CayleyFermion5Dcache.cc
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||||
|
|
||||||
|
// Pminus fowards
|
||||||
|
// Pplus backwards..
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls =this->Ls;
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
if ( s==0 ) {
|
||||||
|
spProj5m(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5p(tmp,psi._odata[ss+Ls-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
spProj5m(tmp,psi._odata[ss+0]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5p(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else {
|
||||||
|
spProj5m(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5p(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls =this->Ls;
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
spProj5p(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5m(tmp,psi._odata[ss+Ls-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
spProj5p(tmp,psi._odata[ss+0]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5m(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
} else {
|
||||||
|
spProj5p(tmp,psi._odata[ss+s+1]);
|
||||||
|
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
|
||||||
|
|
||||||
|
spProj5m(tmp,psi._odata[ss+s-1]);
|
||||||
|
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
|
||||||
|
// Apply (L^{\prime})^{-1}
|
||||||
|
chi[ss]=psi[ss]; // chi[0]=psi[0]
|
||||||
|
for(int s=1;s<Ls;s++){
|
||||||
|
spProj5p(tmp,chi[ss+s-1]);
|
||||||
|
chi[ss+s] = psi[ss+s]-lee[s-1]*tmp;
|
||||||
|
}
|
||||||
|
// L_m^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||||
|
spProj5m(tmp,chi[ss+s]);
|
||||||
|
chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp;
|
||||||
|
}
|
||||||
|
// U_m^{-1} D^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
// Chi[s] + 1/d chi[s]
|
||||||
|
spProj5p(tmp,chi[ss+Ls-1]);
|
||||||
|
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp;
|
||||||
|
}
|
||||||
|
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
|
||||||
|
|
||||||
|
// Apply U^{-1}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
spProj5m(tmp,chi[ss+s+1]);
|
||||||
|
chi[ss+s] = chi[ss+s] - uee[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
|
||||||
|
assert(psi.checkerboard == psi.checkerboard);
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
|
||||||
|
|
||||||
|
auto tmp = psi._odata[0];
|
||||||
|
|
||||||
|
// Apply (U^{\prime})^{-dagger}
|
||||||
|
chi[ss]=psi[ss];
|
||||||
|
for (int s=1;s<Ls;s++){
|
||||||
|
spProj5m(tmp,chi[ss+s-1]);
|
||||||
|
chi[ss+s] = psi[ss+s]-uee[s-1]*tmp;
|
||||||
|
}
|
||||||
|
// U_m^{-\dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
spProj5p(tmp,chi[ss+s]);
|
||||||
|
chi[ss+Ls-1] = chi[ss+Ls-1] - ueem[s]*tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// L_m^{-\dagger} D^{-dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
spProj5m(tmp,chi[ss+Ls-1]);
|
||||||
|
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(leem[s]/dee[Ls-1])*tmp;
|
||||||
|
}
|
||||||
|
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
|
||||||
|
|
||||||
|
// Apply L^{-dagger}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
spProj5p(tmp,chi[ss+s+1]);
|
||||||
|
chi[ss+s] = chi[ss+s] - lee[s]*tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CAYLEY_DPERP_CACHE
|
||||||
|
INSTANTIATE_DPERP(WilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(WilsonImplD);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(GparityWilsonImplD);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplF);
|
||||||
|
INSTANTIATE_DPERP(ZWilsonImplD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}}
|
133
lib/qcd/action/fermion/CayleyFermion5Ddense.cc
Normal file
133
lib/qcd/action/fermion/CayleyFermion5Ddense.cc
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid/Eigen/Dense>
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
/*
|
||||||
|
* Dense matrix versions of routines
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
assert(Ls==LLs);
|
||||||
|
|
||||||
|
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||||
|
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
|
||||||
|
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Pplus(s,s) = bee[s];
|
||||||
|
Pminus(s,s)= bee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pminus(s,s+1) = -cee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pplus(s+1,s) = -cee[s+1];
|
||||||
|
}
|
||||||
|
Pplus (0,Ls-1) = mass*cee[0];
|
||||||
|
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
||||||
|
|
||||||
|
Eigen::MatrixXd PplusMat ;
|
||||||
|
Eigen::MatrixXd PminusMat;
|
||||||
|
|
||||||
|
if ( inv ) {
|
||||||
|
PplusMat =Pplus.inverse();
|
||||||
|
PminusMat=Pminus.inverse();
|
||||||
|
} else {
|
||||||
|
PplusMat =Pplus;
|
||||||
|
PminusMat=Pminus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dag){
|
||||||
|
PplusMat.adjointInPlace();
|
||||||
|
PminusMat.adjointInPlace();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the non-vectorised s-direction this is simple
|
||||||
|
|
||||||
|
for(auto site=0;site<vol;site++){
|
||||||
|
|
||||||
|
SiteSpinor SiteChi;
|
||||||
|
SiteHalfSpinor SitePplus;
|
||||||
|
SiteHalfSpinor SitePminus;
|
||||||
|
|
||||||
|
for(int s1=0;s1<Ls;s1++){
|
||||||
|
SiteChi =zero;
|
||||||
|
for(int s2=0;s2<Ls;s2++){
|
||||||
|
int lex2 = s2+Ls*site;
|
||||||
|
|
||||||
|
if ( PplusMat(s1,s2) != 0.0 ) {
|
||||||
|
spProj5p(SitePplus,psi[lex2]);
|
||||||
|
accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( PminusMat(s1,s2) != 0.0 ) {
|
||||||
|
spProj5m(SitePminus,psi[lex2]);
|
||||||
|
accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chi[s1+Ls*site] = SiteChi*0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
}}
|
149
lib/qcd/action/fermion/CayleyFermion5Dssp.cc
Normal file
149
lib/qcd/action/fermion/CayleyFermion5Dssp.cc
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
|
||||||
|
|
||||||
|
// Pminus fowards
|
||||||
|
// Pplus backwards
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,Ls-1);
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0);
|
||||||
|
axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
} else {
|
||||||
|
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pplus(chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
if ( s==0 ) {
|
||||||
|
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,Ls-1);
|
||||||
|
} else if ( s==(Ls-1)) {
|
||||||
|
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0);
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
} else {
|
||||||
|
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
// Apply (L^{\prime})^{-1}
|
||||||
|
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
||||||
|
for (int s=1;s<Ls;s++){
|
||||||
|
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
|
||||||
|
}
|
||||||
|
// L_m^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
|
||||||
|
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
|
||||||
|
}
|
||||||
|
// U_m^{-1} D^{-1}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
// Chi[s] + 1/d chi[s]
|
||||||
|
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
|
||||||
|
}
|
||||||
|
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
||||||
|
|
||||||
|
// Apply U^{-1}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
int Ls=this->Ls;
|
||||||
|
// Apply (U^{\prime})^{-dagger}
|
||||||
|
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
|
||||||
|
for (int s=1;s<Ls;s++){
|
||||||
|
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
|
||||||
|
}
|
||||||
|
// U_m^{-\dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
|
||||||
|
}
|
||||||
|
// L_m^{-\dagger} D^{-dagger}
|
||||||
|
for (int s=0;s<Ls-1;s++){
|
||||||
|
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
|
||||||
|
}
|
||||||
|
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
|
||||||
|
|
||||||
|
// Apply L^{-dagger}
|
||||||
|
for (int s=Ls-2;s>=0;s--){
|
||||||
|
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CAYLEY_DPERP_LINALG
|
||||||
|
INSTANTIATE(WilsonImplF);
|
||||||
|
INSTANTIATE(WilsonImplD);
|
||||||
|
INSTANTIATE(GparityWilsonImplF);
|
||||||
|
INSTANTIATE(GparityWilsonImplD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
309
lib/qcd/action/fermion/CayleyFermion5Dvec.cc
Normal file
309
lib/qcd/action/fermion/CayleyFermion5Dvec.cc
Normal file
@ -0,0 +1,309 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
#include <Grid/Eigen/Dense>
|
||||||
|
#include <Grid.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
/*
|
||||||
|
* Dense matrix versions of routines
|
||||||
|
*/
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
|
||||||
|
{
|
||||||
|
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
|
||||||
|
}
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
int LLs = grid->_rdimensions[0];
|
||||||
|
int nsimd= Simd::Nsimd();
|
||||||
|
|
||||||
|
Vector<iSinglet<Simd> > u(LLs);
|
||||||
|
Vector<iSinglet<Simd> > l(LLs);
|
||||||
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
|
assert(Ls/LLs==nsimd);
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
// just directly address via type pun
|
||||||
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
|
scalar_type * l_p = (scalar_type *)&l[0];
|
||||||
|
scalar_type * d_p = (scalar_type *)&d[0];
|
||||||
|
|
||||||
|
for(int o=0;o<LLs;o++){ // outer
|
||||||
|
for(int i=0;i<nsimd;i++){ //inner
|
||||||
|
int s = o+i*LLs;
|
||||||
|
int ss = o*nsimd+i;
|
||||||
|
u_p[ss] = upper[s];
|
||||||
|
l_p[ss] = lower[s];
|
||||||
|
d_p[ss] = diag[s];
|
||||||
|
}}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
||||||
|
|
||||||
|
alignas(64) SiteHalfSpinor hp;
|
||||||
|
alignas(64) SiteHalfSpinor hm;
|
||||||
|
alignas(64) SiteSpinor fp;
|
||||||
|
alignas(64) SiteSpinor fm;
|
||||||
|
|
||||||
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
|
int vp=(v+1)%LLs;
|
||||||
|
int vm=(v+LLs-1)%LLs;
|
||||||
|
|
||||||
|
spProj5m(hp,psi[ss+vp]);
|
||||||
|
spProj5p(hm,psi[ss+vm]);
|
||||||
|
|
||||||
|
if ( vp<=v ) rotate(hp,hp,1);
|
||||||
|
if ( vm>=v ) rotate(hm,hm,nsimd-1);
|
||||||
|
|
||||||
|
hp=hp*0.5;
|
||||||
|
hm=hm*0.5;
|
||||||
|
spRecon5m(fp,hp);
|
||||||
|
spRecon5p(fm,hm);
|
||||||
|
|
||||||
|
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||||
|
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
|
||||||
|
const FermionField &phi,
|
||||||
|
FermionField &chi,
|
||||||
|
std::vector<Coeff_t> &lower,
|
||||||
|
std::vector<Coeff_t> &diag,
|
||||||
|
std::vector<Coeff_t> &upper)
|
||||||
|
{
|
||||||
|
GridBase *grid=psi._grid;
|
||||||
|
int Ls = this->Ls;
|
||||||
|
int LLs = grid->_rdimensions[0];
|
||||||
|
int nsimd= Simd::Nsimd();
|
||||||
|
|
||||||
|
Vector<iSinglet<Simd> > u(LLs);
|
||||||
|
Vector<iSinglet<Simd> > l(LLs);
|
||||||
|
Vector<iSinglet<Simd> > d(LLs);
|
||||||
|
|
||||||
|
assert(Ls/LLs==nsimd);
|
||||||
|
assert(phi.checkerboard == psi.checkerboard);
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
// just directly address via type pun
|
||||||
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
|
scalar_type * l_p = (scalar_type *)&l[0];
|
||||||
|
scalar_type * d_p = (scalar_type *)&d[0];
|
||||||
|
|
||||||
|
for(int o=0;o<LLs;o++){ // outer
|
||||||
|
for(int i=0;i<nsimd;i++){ //inner
|
||||||
|
int s = o+i*LLs;
|
||||||
|
int ss = o*nsimd+i;
|
||||||
|
u_p[ss] = upper[s];
|
||||||
|
l_p[ss] = lower[s];
|
||||||
|
d_p[ss] = diag[s];
|
||||||
|
}}
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
||||||
|
|
||||||
|
alignas(64) SiteHalfSpinor hp;
|
||||||
|
alignas(64) SiteHalfSpinor hm;
|
||||||
|
alignas(64) SiteSpinor fp;
|
||||||
|
alignas(64) SiteSpinor fm;
|
||||||
|
|
||||||
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
|
int vp=(v+1)%LLs;
|
||||||
|
int vm=(v+LLs-1)%LLs;
|
||||||
|
|
||||||
|
spProj5p(hp,psi[ss+vp]);
|
||||||
|
spProj5m(hm,psi[ss+vm]);
|
||||||
|
|
||||||
|
if ( vp<=v ) rotate(hp,hp,1);
|
||||||
|
if ( vm>=v ) rotate(hm,hm,nsimd-1);
|
||||||
|
|
||||||
|
hp=hp*0.5;
|
||||||
|
hm=hm*0.5;
|
||||||
|
spRecon5p(fp,hp);
|
||||||
|
spRecon5m(fm,hm);
|
||||||
|
|
||||||
|
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
||||||
|
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
|
{
|
||||||
|
int Ls=this->Ls;
|
||||||
|
int LLs = psi._grid->_rdimensions[0];
|
||||||
|
int vol = psi._grid->oSites()/LLs;
|
||||||
|
|
||||||
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
|
||||||
|
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
Pplus(s,s) = bee[s];
|
||||||
|
Pminus(s,s)= bee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pminus(s,s+1) = -cee[s];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int s=0;s<Ls-1;s++){
|
||||||
|
Pplus(s+1,s) = -cee[s+1];
|
||||||
|
}
|
||||||
|
Pplus (0,Ls-1) = mass*cee[0];
|
||||||
|
Pminus(Ls-1,0) = mass*cee[Ls-1];
|
||||||
|
|
||||||
|
Eigen::MatrixXcd PplusMat ;
|
||||||
|
Eigen::MatrixXcd PminusMat;
|
||||||
|
|
||||||
|
if ( inv ) {
|
||||||
|
PplusMat =Pplus.inverse();
|
||||||
|
PminusMat=Pminus.inverse();
|
||||||
|
} else {
|
||||||
|
PplusMat =Pplus;
|
||||||
|
PminusMat=Pminus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(dag){
|
||||||
|
PplusMat.adjointInPlace();
|
||||||
|
PminusMat.adjointInPlace();
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename SiteHalfSpinor::scalar_type scalar_type;
|
||||||
|
const int Nsimd=Simd::Nsimd();
|
||||||
|
Vector<iSinglet<Simd> > Matp(Ls*LLs);
|
||||||
|
Vector<iSinglet<Simd> > Matm(Ls*LLs);
|
||||||
|
|
||||||
|
for(int s2=0;s2<Ls;s2++){
|
||||||
|
for(int s1=0;s1<LLs;s1++){
|
||||||
|
int istride = LLs;
|
||||||
|
int ostride = 1;
|
||||||
|
Simd Vp;
|
||||||
|
Simd Vm;
|
||||||
|
scalar_type *sp = (scalar_type *)&Vp;
|
||||||
|
scalar_type *sm = (scalar_type *)&Vm;
|
||||||
|
for(int l=0;l<Nsimd;l++){
|
||||||
|
sp[l] = PplusMat (l*istride+s1*ostride ,s2);
|
||||||
|
sm[l] = PminusMat(l*istride+s1*ostride,s2);
|
||||||
|
}
|
||||||
|
Matp[LLs*s2+s1] = Vp;
|
||||||
|
Matm[LLs*s2+s1] = Vm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dynamic allocate on stack to get per thread without serialised heap acces
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(auto site=0;site<vol;site++){
|
||||||
|
|
||||||
|
// SiteHalfSpinor *SitePplus =(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
|
||||||
|
// SiteHalfSpinor *SitePminus=(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
|
||||||
|
// SiteSpinor *SiteChi =(SiteSpinor *) alloca(LLs*sizeof(SiteSpinor));
|
||||||
|
|
||||||
|
Vector<SiteHalfSpinor> SitePplus(LLs);
|
||||||
|
Vector<SiteHalfSpinor> SitePminus(LLs);
|
||||||
|
Vector<SiteHalfSpinor> SiteChiP(LLs);
|
||||||
|
Vector<SiteHalfSpinor> SiteChiM(LLs);
|
||||||
|
Vector<SiteSpinor> SiteChi(LLs);
|
||||||
|
|
||||||
|
SiteHalfSpinor BcastP;
|
||||||
|
SiteHalfSpinor BcastM;
|
||||||
|
|
||||||
|
for(int s=0;s<LLs;s++){
|
||||||
|
int lex = s+LLs*site;
|
||||||
|
spProj5p(SitePplus[s] ,psi[lex]);
|
||||||
|
spProj5m(SitePminus[s],psi[lex]);
|
||||||
|
SiteChiP[s]=zero;
|
||||||
|
SiteChiM[s]=zero;
|
||||||
|
}
|
||||||
|
|
||||||
|
int s=0;
|
||||||
|
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
|
||||||
|
for(int s2=0;s2<LLs;s2++){ // Column loop of right hand side
|
||||||
|
vbroadcast(BcastP,SitePplus [s2],l);
|
||||||
|
vbroadcast(BcastM,SitePminus[s2],l);
|
||||||
|
for(int s1=0;s1<LLs;s1++){ // Column loop of reduction variables
|
||||||
|
SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP;
|
||||||
|
SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM;
|
||||||
|
}
|
||||||
|
s++;
|
||||||
|
}}
|
||||||
|
|
||||||
|
for(int s=0;s<LLs;s++){
|
||||||
|
int lex = s+LLs*site;
|
||||||
|
spRecon5p(SiteChi[s],SiteChiP[s]);
|
||||||
|
accumRecon5m(SiteChi[s],SiteChiM[s]);
|
||||||
|
chi[lex] = SiteChi[s]*0.5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_DPERP(DomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_DPERP(DomainWallVec5dImplF);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
|
||||||
|
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
|
||||||
|
|
||||||
|
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
|
||||||
|
|
||||||
|
}}
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
|
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
|
||||||
#define GRID_QCD_DOMAIN_WALL_FERMION_H
|
#define GRID_QCD_DOMAIN_WALL_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -42,6 +42,10 @@ namespace Grid {
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) {
|
||||||
|
this->MomentumSpacePropagatorHt(out,in,_m);
|
||||||
|
};
|
||||||
|
|
||||||
virtual void Instantiatable(void) {};
|
virtual void Instantiatable(void) {};
|
||||||
// Constructors
|
// Constructors
|
||||||
DomainWallFermion(GaugeField &_Umu,
|
DomainWallFermion(GaugeField &_Umu,
|
||||||
@ -51,6 +55,7 @@ namespace Grid {
|
|||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
|
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) :
|
||||||
|
|
||||||
|
|
||||||
CayleyFermion5D<Impl>(_Umu,
|
CayleyFermion5D<Impl>(_Umu,
|
||||||
FiveDimGrid,
|
FiveDimGrid,
|
||||||
FiveDimRedBlackGrid,
|
FiveDimRedBlackGrid,
|
||||||
|
@ -91,6 +91,20 @@ namespace Grid {
|
|||||||
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's
|
||||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||||
|
|
||||||
|
|
||||||
|
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) { assert(0);};
|
||||||
|
|
||||||
|
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
|
||||||
|
FFT theFFT((GridCartesian *) in._grid);
|
||||||
|
|
||||||
|
FermionField in_k(in._grid);
|
||||||
|
FermionField prop_k(in._grid);
|
||||||
|
|
||||||
|
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
||||||
|
this->MomentumSpacePropagator(prop_k,in_k,mass);
|
||||||
|
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||||
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Updates gauge field during HMC
|
// Updates gauge field during HMC
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
|
@ -1,490 +1,532 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
|
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||||
|
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
namespace QCD {
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// Template parameter class constructs to package
|
// Template parameter class constructs to package
|
||||||
// externally control Fermion implementations
|
// externally control Fermion implementations
|
||||||
// in orthogonal directions
|
// in orthogonal directions
|
||||||
//
|
//
|
||||||
// Ultimately need Impl to always define types where XXX is opaque
|
// Ultimately need Impl to always define types where XXX is opaque
|
||||||
//
|
//
|
||||||
// typedef typename XXX Simd;
|
// typedef typename XXX Simd;
|
||||||
// typedef typename XXX GaugeLinkField;
|
// typedef typename XXX GaugeLinkField;
|
||||||
// typedef typename XXX GaugeField;
|
// typedef typename XXX GaugeField;
|
||||||
// typedef typename XXX GaugeActField;
|
// typedef typename XXX GaugeActField;
|
||||||
// typedef typename XXX FermionField;
|
// typedef typename XXX FermionField;
|
||||||
// typedef typename XXX DoubledGaugeField;
|
// typedef typename XXX DoubledGaugeField;
|
||||||
// typedef typename XXX SiteSpinor;
|
// typedef typename XXX SiteSpinor;
|
||||||
// typedef typename XXX SiteHalfSpinor;
|
// typedef typename XXX SiteHalfSpinor;
|
||||||
// typedef typename XXX Compressor;
|
// typedef typename XXX Compressor;
|
||||||
//
|
//
|
||||||
// and Methods:
|
// and Methods:
|
||||||
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
||||||
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||||
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// To acquire the typedefs from "Base" (either a base class or template param) use:
|
// To acquire the typedefs from "Base" (either a base class or template param) use:
|
||||||
//
|
//
|
||||||
// INHERIT_GIMPL_TYPES(Base)
|
// INHERIT_GIMPL_TYPES(Base)
|
||||||
// INHERIT_FIMPL_TYPES(Base)
|
// INHERIT_FIMPL_TYPES(Base)
|
||||||
// INHERIT_IMPL_TYPES(Base)
|
// INHERIT_IMPL_TYPES(Base)
|
||||||
//
|
//
|
||||||
// The Fermion operators will do the following:
|
// The Fermion operators will do the following:
|
||||||
//
|
//
|
||||||
// struct MyOpParams {
|
// struct MyOpParams {
|
||||||
// RealD mass;
|
// RealD mass;
|
||||||
// };
|
// };
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// template<class Impl>
|
// template<class Impl>
|
||||||
// class MyOp : pubic<Impl> {
|
// class MyOp : public<Impl> {
|
||||||
// public:
|
// public:
|
||||||
//
|
//
|
||||||
// INHERIT_ALL_IMPL_TYPES(Impl);
|
// INHERIT_ALL_IMPL_TYPES(Impl);
|
||||||
//
|
//
|
||||||
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
|
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
|
||||||
// {
|
// {
|
||||||
//
|
//
|
||||||
// };
|
// };
|
||||||
//
|
//
|
||||||
// }
|
// }
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Implementation dependent fermion types
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Implementation dependent fermion types
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define INHERIT_FIMPL_TYPES(Impl)\
|
#define INHERIT_FIMPL_TYPES(Impl)\
|
||||||
typedef typename Impl::FermionField FermionField; \
|
typedef typename Impl::FermionField FermionField; \
|
||||||
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||||
typedef typename Impl::Compressor Compressor; \
|
typedef typename Impl::Compressor Compressor; \
|
||||||
typedef typename Impl::StencilImpl StencilImpl; \
|
typedef typename Impl::StencilImpl StencilImpl; \
|
||||||
typedef typename Impl::ImplParams ImplParams;
|
typedef typename Impl::ImplParams ImplParams; \
|
||||||
|
typedef typename Impl::Coeff_t Coeff_t;
|
||||||
|
|
||||||
#define INHERIT_IMPL_TYPES(Base) \
|
#define INHERIT_IMPL_TYPES(Base) \
|
||||||
INHERIT_GIMPL_TYPES(Base)\
|
INHERIT_GIMPL_TYPES(Base) \
|
||||||
INHERIT_FIMPL_TYPES(Base)
|
INHERIT_FIMPL_TYPES(Base)
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Single flavour four spinors with colour index
|
||||||
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
|
||||||
|
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||||
|
|
||||||
///////
|
|
||||||
// Single flavour four spinors with colour index
|
|
||||||
///////
|
|
||||||
template<class S,int Nrepresentation=Nc>
|
|
||||||
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
static const int Dimension = Representation::Dimension;
|
||||||
|
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
|
|
||||||
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
|
||||||
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
|
||||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
|
|
||||||
|
|
||||||
typedef iImplSpinor <Simd> SiteSpinor;
|
|
||||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
|
||||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
|
||||||
|
|
||||||
typedef Lattice<SiteSpinor> FermionField;
|
|
||||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
|
||||||
|
|
||||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
|
||||||
typedef WilsonImplParams ImplParams;
|
|
||||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
|
||||||
|
|
||||||
ImplParams Params;
|
|
||||||
|
|
||||||
WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
|
||||||
|
|
||||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
|
|
||||||
mult(&phi(),&U(mu),&chi());
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class ref>
|
|
||||||
inline void loadLinkElement(Simd & reg,ref &memory){
|
|
||||||
reg = memory;
|
|
||||||
}
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
|
||||||
{
|
|
||||||
conformable(Uds._grid,GaugeGrid);
|
|
||||||
conformable(Umu._grid,GaugeGrid);
|
|
||||||
GaugeLinkField U(GaugeGrid);
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
PokeIndex<LorentzIndex>(Uds,U,mu);
|
|
||||||
U = adj(Cshift(U,mu,-1));
|
|
||||||
PokeIndex<LorentzIndex>(Uds,U,mu+4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
//Necessary?
|
||||||
GaugeLinkField link(mat._grid);
|
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||||
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
|
||||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
const bool LsVectorised=false;
|
||||||
}
|
typedef _Coeff_t Coeff_t;
|
||||||
|
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
int Ls=Btilde._grid->_fdimensions[0];
|
|
||||||
|
|
||||||
GaugeLinkField tmp(mat._grid);
|
|
||||||
tmp = zero;
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
|
||||||
int sU=sss;
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
int sF = s+Ls*sU;
|
|
||||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
///////
|
|
||||||
// Single flavour four spinors with colour index, 5d redblack
|
|
||||||
///////
|
|
||||||
template<class S,int Nrepresentation=Nc>
|
|
||||||
class DomainWallRedBlack5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
|
||||||
public:
|
|
||||||
|
|
||||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
|
|
||||||
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
|
||||||
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
|
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||||
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
|
|
||||||
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
|
||||||
|
|
||||||
typedef iImplSpinor <Simd> SiteSpinor;
|
typedef iImplSpinor<Simd> SiteSpinor;
|
||||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
typedef Lattice<SiteSpinor> FermionField;
|
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||||
|
|
||||||
// Make the doubled gauge field a *scalar*
|
|
||||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
|
||||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
|
||||||
typedef iImplGaugeLink <typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
|
||||||
|
|
||||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
|
||||||
|
|
||||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
|
||||||
typedef WilsonImplParams ImplParams;
|
|
||||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
|
||||||
|
|
||||||
ImplParams Params;
|
|
||||||
|
|
||||||
DomainWallRedBlack5dImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return false; };
|
|
||||||
|
|
||||||
template<class ref>
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
inline void loadLinkElement(Simd & reg,ref &memory){
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
vsplat(reg,memory);
|
|
||||||
|
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||||
|
typedef WilsonImplParams ImplParams;
|
||||||
|
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||||
|
|
||||||
|
ImplParams Params;
|
||||||
|
|
||||||
|
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
|
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||||
|
|
||||||
|
inline void multLink(SiteHalfSpinor &phi,
|
||||||
|
const SiteDoubledGaugeField &U,
|
||||||
|
const SiteHalfSpinor &chi,
|
||||||
|
int mu,
|
||||||
|
StencilEntry *SE,
|
||||||
|
StencilImpl &St) {
|
||||||
|
mult(&phi(), &U(mu), &chi());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class ref>
|
||||||
|
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||||
|
reg = memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void DoubleStore(GridBase *GaugeGrid,
|
||||||
|
DoubledGaugeField &Uds,
|
||||||
|
const GaugeField &Umu) {
|
||||||
|
conformable(Uds._grid, GaugeGrid);
|
||||||
|
conformable(Umu._grid, GaugeGrid);
|
||||||
|
GaugeLinkField U(GaugeGrid);
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
|
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||||
|
U = adj(Cshift(U, mu, -1));
|
||||||
|
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||||
}
|
}
|
||||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
}
|
||||||
{
|
|
||||||
SiteGaugeLink UU;
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||||
for(int i=0;i<Nrepresentation;i++){
|
GaugeLinkField link(mat._grid);
|
||||||
for(int j=0;j<Nrepresentation;j++){
|
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||||
vsplat(UU()()(i,j),U(mu)()(i,j));
|
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||||
|
|
||||||
|
int Ls=Btilde._grid->_fdimensions[0];
|
||||||
|
GaugeLinkField tmp(mat._grid);
|
||||||
|
tmp = zero;
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
||||||
|
int sU=sss;
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
int sF = s+Ls*sU;
|
||||||
|
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||||
}
|
}
|
||||||
mult(&phi(),&UU(),&chi());
|
|
||||||
}
|
}
|
||||||
|
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
{
|
// Single flavour four spinors with colour index, 5d redblack
|
||||||
SiteScalarGaugeField ScalarUmu;
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
SiteDoubledGaugeField ScalarUds;
|
|
||||||
|
|
||||||
GaugeLinkField U (Umu._grid);
|
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||||
GaugeField Uadj(Umu._grid);
|
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||||
for(int mu=0;mu<Nd;mu++){
|
public:
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
U = adj(Cshift(U,mu,-1));
|
static const int Dimension = Nrepresentation;
|
||||||
PokeIndex<LorentzIndex>(Uadj,U,mu);
|
const bool LsVectorised=true;
|
||||||
}
|
typedef _Coeff_t Coeff_t;
|
||||||
|
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
|
||||||
for(int lidx=0;lidx<GaugeGrid->lSites();lidx++){
|
|
||||||
std::vector<int> lcoor;
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor);
|
|
||||||
|
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||||
peekLocalSite(ScalarUmu,Umu,lcoor);
|
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||||
for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu);
|
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
|
||||||
|
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
|
||||||
peekLocalSite(ScalarUmu,Uadj,lcoor);
|
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||||
for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu);
|
|
||||||
|
typedef iImplSpinor<Simd> SiteSpinor;
|
||||||
pokeLocalSite(ScalarUds,Uds,lcoor);
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
}
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
|
|
||||||
|
// Make the doubled gauge field a *scalar*
|
||||||
|
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||||
|
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||||
|
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||||
|
|
||||||
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
|
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||||
|
typedef WilsonImplParams ImplParams;
|
||||||
|
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||||
|
|
||||||
|
ImplParams Params;
|
||||||
|
|
||||||
|
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
|
bool overlapCommsCompute(void) { return false; };
|
||||||
|
|
||||||
|
template <class ref>
|
||||||
|
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||||
|
vsplat(reg, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||||
|
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||||
|
StencilImpl &St) {
|
||||||
|
SiteGaugeLink UU;
|
||||||
|
for (int i = 0; i < Nrepresentation; i++) {
|
||||||
|
for (int j = 0; j < Nrepresentation; j++) {
|
||||||
|
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
mult(&phi(), &UU(), &chi());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
|
{
|
||||||
|
SiteScalarGaugeField ScalarUmu;
|
||||||
|
SiteDoubledGaugeField ScalarUds;
|
||||||
|
|
||||||
|
GaugeLinkField U(Umu._grid);
|
||||||
|
GaugeField Uadj(Umu._grid);
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||||
|
U = adj(Cshift(U, mu, -1));
|
||||||
|
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||||
|
std::vector<int> lcoor;
|
||||||
|
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||||
|
|
||||||
|
peekLocalSite(ScalarUmu, Umu, lcoor);
|
||||||
|
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||||
|
|
||||||
|
peekLocalSite(ScalarUmu, Uadj, lcoor);
|
||||||
|
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||||
|
|
||||||
|
pokeLocalSite(ScalarUds, Uds, lcoor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
|
||||||
|
{
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu)
|
||||||
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template<class S,int Nrepresentation>
|
|
||||||
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
|
|
||||||
public:
|
|
||||||
|
|
||||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
|
||||||
|
|
||||||
template<typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >;
|
|
||||||
template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
|
|
||||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
|
|
||||||
|
|
||||||
typedef iImplSpinor <Simd> SiteSpinor;
|
template <class S, int Nrepresentation,class _Coeff_t = RealD>
|
||||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
public:
|
||||||
|
|
||||||
typedef Lattice<SiteSpinor> FermionField;
|
static const int Dimension = Nrepresentation;
|
||||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
|
||||||
|
|
||||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
const bool LsVectorised=false;
|
||||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
|
||||||
|
|
||||||
typedef GparityWilsonImplParams ImplParams;
|
typedef _Coeff_t Coeff_t;
|
||||||
|
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
||||||
ImplParams Params;
|
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
|
||||||
|
|
||||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||||
|
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
|
||||||
// provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity
|
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
|
||||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
|
|
||||||
|
|
||||||
typedef SiteHalfSpinor vobj;
|
|
||||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
|
||||||
|
|
||||||
vobj vtmp;
|
|
||||||
sobj stmp;
|
|
||||||
|
|
||||||
GridBase *grid = St._grid;
|
|
||||||
|
|
||||||
const int Nsimd = grid->Nsimd();
|
typedef iImplSpinor<Simd> SiteSpinor;
|
||||||
|
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||||
int direction = St._directions[mu];
|
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||||
int distance = St._distances[mu];
|
|
||||||
int ptype = St._permute_type[mu];
|
typedef Lattice<SiteSpinor> FermionField;
|
||||||
int sl = St._grid->_simd_layout[direction];
|
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||||
|
|
||||||
// Fixme X.Y.Z.T hardcode in stencil
|
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||||
int mmu = mu % Nd;
|
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||||
|
|
||||||
// assert our assumptions
|
typedef GparityWilsonImplParams ImplParams;
|
||||||
assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code
|
|
||||||
assert((sl==1)||(sl==2));
|
|
||||||
|
|
||||||
std::vector<int> icoor;
|
|
||||||
|
|
||||||
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
ImplParams Params;
|
||||||
|
|
||||||
if ( sl == 2 ) {
|
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||||
|
|
||||||
std::vector<sobj> vals(Nsimd);
|
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||||
|
|
||||||
extract(chi,vals);
|
// provide the multiply by link that is differentiated between Gparity (with
|
||||||
for(int s=0;s<Nsimd;s++){
|
// flavour index) and non-Gparity
|
||||||
|
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||||
|
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||||
|
StencilImpl &St) {
|
||||||
|
|
||||||
grid->iCoorFromIindex(icoor,s);
|
typedef SiteHalfSpinor vobj;
|
||||||
|
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||||
|
|
||||||
|
vobj vtmp;
|
||||||
|
sobj stmp;
|
||||||
|
|
||||||
|
GridBase *grid = St._grid;
|
||||||
|
|
||||||
|
const int Nsimd = grid->Nsimd();
|
||||||
|
|
||||||
|
int direction = St._directions[mu];
|
||||||
|
int distance = St._distances[mu];
|
||||||
|
int ptype = St._permute_type[mu];
|
||||||
|
int sl = St._grid->_simd_layout[direction];
|
||||||
|
|
||||||
|
// Fixme X.Y.Z.T hardcode in stencil
|
||||||
|
int mmu = mu % Nd;
|
||||||
|
|
||||||
|
// assert our assumptions
|
||||||
|
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
|
||||||
|
assert((sl == 1) || (sl == 2));
|
||||||
|
|
||||||
|
std::vector<int> icoor;
|
||||||
|
|
||||||
|
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
||||||
|
|
||||||
|
if ( sl == 2 ) {
|
||||||
|
|
||||||
|
std::vector<sobj> vals(Nsimd);
|
||||||
|
|
||||||
|
extract(chi,vals);
|
||||||
|
for(int s=0;s<Nsimd;s++){
|
||||||
|
|
||||||
|
grid->iCoorFromIindex(icoor,s);
|
||||||
|
|
||||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||||
|
|
||||||
int permute_lane;
|
int permute_lane;
|
||||||
if ( distance == 1) {
|
if ( distance == 1) {
|
||||||
permute_lane = icoor[direction]?1:0;
|
permute_lane = icoor[direction]?1:0;
|
||||||
} else {
|
} else {
|
||||||
permute_lane = icoor[direction]?0:1;
|
permute_lane = icoor[direction]?0:1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( permute_lane ) {
|
||||||
|
stmp(0) = vals[s](1);
|
||||||
|
stmp(1) = vals[s](0);
|
||||||
|
vals[s] = stmp;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if ( permute_lane ) {
|
merge(vtmp,vals);
|
||||||
stmp(0) = vals[s](1);
|
|
||||||
stmp(1) = vals[s](0);
|
} else {
|
||||||
vals[s] = stmp;
|
vtmp(0) = chi(1);
|
||||||
}
|
vtmp(1) = chi(0);
|
||||||
}
|
}
|
||||||
merge(vtmp,vals);
|
mult(&phi(0),&U(0)(mu),&vtmp(0));
|
||||||
|
mult(&phi(1),&U(1)(mu),&vtmp(1));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
mult(&phi(0),&U(0)(mu),&chi(0));
|
||||||
|
mult(&phi(1),&U(1)(mu),&chi(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||||
vtmp(0) = chi(1);
|
{
|
||||||
vtmp(1) = chi(0);
|
conformable(Uds._grid,GaugeGrid);
|
||||||
}
|
conformable(Umu._grid,GaugeGrid);
|
||||||
mult(&phi(0),&U(0)(mu),&vtmp(0));
|
|
||||||
mult(&phi(1),&U(1)(mu),&vtmp(1));
|
GaugeLinkField Utmp (GaugeGrid);
|
||||||
|
GaugeLinkField U (GaugeGrid);
|
||||||
|
GaugeLinkField Uconj(GaugeGrid);
|
||||||
|
|
||||||
|
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
|
||||||
} else {
|
LatticeCoordinate(coor,mu);
|
||||||
mult(&phi(0),&U(0)(mu),&chi(0));
|
|
||||||
mult(&phi(1),&U(1)(mu),&chi(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
|
||||||
{
|
|
||||||
|
|
||||||
conformable(Uds._grid,GaugeGrid);
|
|
||||||
conformable(Umu._grid,GaugeGrid);
|
|
||||||
|
|
||||||
GaugeLinkField Utmp (GaugeGrid);
|
|
||||||
GaugeLinkField U (GaugeGrid);
|
|
||||||
GaugeLinkField Uconj(GaugeGrid);
|
|
||||||
|
|
||||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
|
||||||
|
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
|
|
||||||
LatticeCoordinate(coor,mu);
|
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||||
|
Uconj = conjugate(U);
|
||||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
|
||||||
Uconj = conjugate(U);
|
// This phase could come from a simple bc 1,1,-1,1 ..
|
||||||
|
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
||||||
// This phase could come from a simple bc 1,1,-1,1 ..
|
if ( Params.twists[mu] ) {
|
||||||
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||||
if ( Params.twists[mu] ) {
|
}
|
||||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=U.begin();ss<U.end();ss++){
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
Uds[ss](0)(mu) = U[ss]();
|
Uds[ss](0)(mu) = U[ss]();
|
||||||
Uds[ss](1)(mu) = Uconj[ss]();
|
Uds[ss](1)(mu) = Uconj[ss]();
|
||||||
}
|
}
|
||||||
|
|
||||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||||
Uconj = adj(Cshift(Uconj,mu,-1));
|
Uconj = adj(Cshift(Uconj,mu,-1));
|
||||||
|
|
||||||
Utmp = U;
|
Utmp = U;
|
||||||
if ( Params.twists[mu] ) {
|
if ( Params.twists[mu] ) {
|
||||||
Utmp = where(coor==0,Uconj,Utmp);
|
Utmp = where(coor==0,Uconj,Utmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=U.begin();ss<U.end();ss++){
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||||
}
|
}
|
||||||
|
|
||||||
Utmp = Uconj;
|
Utmp = Uconj;
|
||||||
if ( Params.twists[mu] ) {
|
if ( Params.twists[mu] ) {
|
||||||
Utmp = where(coor==0,U,Utmp);
|
Utmp = where(coor==0,U,Utmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=U.begin();ss<U.end();ss++){
|
for(auto ss=U.begin();ss<U.end();ss++){
|
||||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
|
||||||
|
|
||||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
// DhopDir provides U or Uconj depending on coor/flavour.
|
||||||
|
GaugeLinkField link(mat._grid);
|
||||||
|
// use lorentz for flavour as hack.
|
||||||
|
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
|
||||||
|
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
|
||||||
|
}
|
||||||
|
PokeIndex<LorentzIndex>(mat, link, mu);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||||
|
|
||||||
|
int Ls = Btilde._grid->_fdimensions[0];
|
||||||
|
|
||||||
// DhopDir provides U or Uconj depending on coor/flavour.
|
GaugeLinkField tmp(mat._grid);
|
||||||
GaugeLinkField link(mat._grid);
|
tmp = zero;
|
||||||
// use lorentz for flavour as hack.
|
|
||||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(auto ss=tmp.begin();ss<tmp.end();ss++){
|
for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
|
||||||
link[ss]() = tmp[ss](0,0) - conjugate(tmp[ss](1,1)) ;
|
for (int s = 0; s < Ls; s++) {
|
||||||
}
|
int sF = s + Ls * ss;
|
||||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
|
||||||
return;
|
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||||
}
|
}
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
}
|
||||||
|
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int Ls=Btilde._grid->_fdimensions[0];
|
};
|
||||||
|
|
||||||
GaugeLinkField tmp(mat._grid);
|
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
|
||||||
tmp = zero;
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
|
||||||
PARALLEL_FOR_LOOP
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
|
||||||
for(int ss=0;ss<tmp._grid->oSites();ss++){
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
int sF = s+Ls*ss;
|
|
||||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF]));
|
|
||||||
tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec
|
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
|
||||||
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
|
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
|
||||||
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
|
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
|
||||||
|
|
||||||
|
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
|
||||||
|
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
|
||||||
|
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
|
||||||
|
|
||||||
|
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
|
||||||
|
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
|
||||||
|
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
|
||||||
|
|
||||||
|
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||||
|
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||||
|
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
|
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||||
|
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
|
||||||
|
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
|
||||||
|
|
||||||
|
typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||||
|
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
|
||||||
|
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
|
||||||
|
|
||||||
typedef DomainWallRedBlack5dImpl<vComplex ,Nc> DomainWallRedBlack5dImplR; // Real.. whichever prec
|
}}
|
||||||
typedef DomainWallRedBlack5dImpl<vComplexF,Nc> DomainWallRedBlack5dImplF; // Float
|
|
||||||
typedef DomainWallRedBlack5dImpl<vComplexD,Nc> DomainWallRedBlack5dImplD; // Double
|
|
||||||
|
|
||||||
typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
|
|
||||||
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
|
|
||||||
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_MOBIUS_FERMION_H
|
#ifndef GRID_QCD_MOBIUS_FERMION_H
|
||||||
#define GRID_QCD_MOBIUS_FERMION_H
|
#define GRID_QCD_MOBIUS_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
||||||
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
||||||
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
@ -42,7 +42,11 @@ namespace Grid {
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Constructors
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) {
|
||||||
|
this->MomentumSpacePropagatorHw(out,in,_m);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Constructors
|
||||||
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
|
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
||||||
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
||||||
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
||||||
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
||||||
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
||||||
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
|
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
|
||||||
#define GRID_QCD_SCALED_SHAMIR_FERMION_H
|
#define GRID_QCD_SCALED_SHAMIR_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
||||||
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
|
||||||
|
|
||||||
#include <Grid.h>
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
|
@ -1,130 +1,129 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
|
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
const std::vector<int> WilsonFermionStatic::directions ({0,1,2,3, 0, 1, 2, 3});
|
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2,
|
||||||
const std::vector<int> WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1});
|
3});
|
||||||
int WilsonFermionStatic::HandOptDslash;
|
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1,
|
||||||
|
-1, -1});
|
||||||
|
int WilsonFermionStatic::HandOptDslash;
|
||||||
|
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
// Constructor and gauge import
|
// Constructor and gauge import
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu,
|
WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||||
GridCartesian &Fgrid,
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
GridRedBlackCartesian &Hgrid,
|
const ImplParams &p)
|
||||||
RealD _mass,const ImplParams &p) :
|
: Kernels(p),
|
||||||
Kernels(p),
|
_grid(&Fgrid),
|
||||||
_grid(&Fgrid),
|
_cbgrid(&Hgrid),
|
||||||
_cbgrid(&Hgrid),
|
Stencil(&Fgrid, npoint, Even, directions, displacements),
|
||||||
Stencil (&Fgrid,npoint,Even,directions,displacements),
|
StencilEven(&Hgrid, npoint, Even, directions,
|
||||||
StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even
|
displacements), // source is Even
|
||||||
StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd
|
StencilOdd(&Hgrid, npoint, Odd, directions,
|
||||||
mass(_mass),
|
displacements), // source is Odd
|
||||||
Lebesgue(_grid),
|
mass(_mass),
|
||||||
LebesgueEvenOdd(_cbgrid),
|
Lebesgue(_grid),
|
||||||
Umu(&Fgrid),
|
LebesgueEvenOdd(_cbgrid),
|
||||||
UmuEven(&Hgrid),
|
Umu(&Fgrid),
|
||||||
UmuOdd (&Hgrid)
|
UmuEven(&Hgrid),
|
||||||
{
|
UmuOdd(&Hgrid) {
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
|
||||||
|
GaugeField HUmu(_Umu._grid);
|
||||||
|
HUmu = _Umu * (-0.5);
|
||||||
|
Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
|
||||||
|
pickCheckerboard(Even, UmuEven, Umu);
|
||||||
|
pickCheckerboard(Odd, UmuOdd, Umu);
|
||||||
|
}
|
||||||
|
|
||||||
|
/////////////////////////////
|
||||||
|
// Implement the interface
|
||||||
|
/////////////////////////////
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
Dhop(in, out, DaggerNo);
|
||||||
|
return axpy_norm(out, 4 + mass, in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
Dhop(in, out, DaggerYes);
|
||||||
|
return axpy_norm(out, 4 + mass, in, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
|
||||||
|
if (in.checkerboard == Odd) {
|
||||||
|
DhopEO(in, out, DaggerNo);
|
||||||
|
} else {
|
||||||
|
DhopOE(in, out, DaggerNo);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
|
||||||
{
|
if (in.checkerboard == Odd) {
|
||||||
GaugeField HUmu(_Umu._grid);
|
DhopEO(in, out, DaggerYes);
|
||||||
HUmu = _Umu*(-0.5);
|
} else {
|
||||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
DhopOE(in, out, DaggerYes);
|
||||||
pickCheckerboard(Even,UmuEven,Umu);
|
|
||||||
pickCheckerboard(Odd ,UmuOdd,Umu);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/////////////////////////////
|
template <class Impl>
|
||||||
// Implement the interface
|
|
||||||
/////////////////////////////
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
out.checkerboard=in.checkerboard;
|
|
||||||
Dhop(in,out,DaggerNo);
|
|
||||||
return axpy_norm(out,4+mass,in,out);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
out.checkerboard=in.checkerboard;
|
|
||||||
Dhop(in,out,DaggerYes);
|
|
||||||
return axpy_norm(out,4+mass,in,out);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
if ( in.checkerboard == Odd ) {
|
|
||||||
DhopEO(in,out,DaggerNo);
|
|
||||||
} else {
|
|
||||||
DhopOE(in,out,DaggerNo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
if ( in.checkerboard == Odd ) {
|
|
||||||
DhopEO(in,out,DaggerYes);
|
|
||||||
} else {
|
|
||||||
DhopOE(in,out,DaggerYes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.checkerboard = in.checkerboard;
|
||||||
typename FermionField::scalar_type scal(4.0+mass);
|
typename FermionField::scalar_type scal(4.0 + mass);
|
||||||
out = scal*in;
|
out = scal * in;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.checkerboard = in.checkerboard;
|
||||||
Mooee(in,out);
|
Mooee(in, out);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
|
||||||
out.checkerboard = in.checkerboard;
|
out.checkerboard = in.checkerboard;
|
||||||
@ -136,184 +135,237 @@ namespace QCD {
|
|||||||
out.checkerboard = in.checkerboard;
|
out.checkerboard = in.checkerboard;
|
||||||
MooeeInv(in,out);
|
MooeeInv(in,out);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////
|
|
||||||
// Internal
|
|
||||||
///////////////////////////////////
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion<Impl>::DerivInternal(StencilImpl & st,
|
void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m) {
|
||||||
DoubledGaugeField & U,
|
|
||||||
GaugeField &mat,
|
|
||||||
const FermionField &A,
|
|
||||||
const FermionField &B,int dag) {
|
|
||||||
|
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
||||||
|
|
||||||
Compressor compressor(dag);
|
|
||||||
|
|
||||||
FermionField Btilde(B._grid);
|
|
||||||
FermionField Atilde(B._grid);
|
|
||||||
Atilde = A;
|
|
||||||
|
|
||||||
st.HaloExchange(B,compressor);
|
// what type LatticeComplex
|
||||||
|
conformable(_grid,out._grid);
|
||||||
for(int mu=0;mu<Nd;mu++){
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Flip gamma (1+g)<->(1-g) if dag
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
int gamma = mu;
|
|
||||||
if ( !dag ) gamma+= Nd;
|
|
||||||
|
|
||||||
////////////////////////
|
|
||||||
// Call the single hop
|
|
||||||
////////////////////////
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<B._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// spin trace outer product
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
Impl::InsertForce4D(mat,Btilde,Atilde,mu);
|
|
||||||
|
|
||||||
|
typedef typename FermionField::vector_type vector_type;
|
||||||
|
typedef typename FermionField::scalar_type ScalComplex;
|
||||||
|
|
||||||
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<int> latt_size = _grid->_fdimensions;
|
||||||
|
|
||||||
|
FermionField num (_grid); num = zero;
|
||||||
|
LatComplex wilson(_grid); wilson= zero;
|
||||||
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||||
|
|
||||||
|
LatComplex denom(_grid); denom= zero;
|
||||||
|
LatComplex kmu(_grid);
|
||||||
|
ScalComplex ci(0.0,1.0);
|
||||||
|
// momphase = n * 2pi / L
|
||||||
|
for(int mu=0;mu<Nd;mu++) {
|
||||||
|
|
||||||
|
LatticeCoordinate(kmu,mu);
|
||||||
|
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
|
||||||
|
kmu = TwoPiL * kmu;
|
||||||
|
|
||||||
|
wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
||||||
|
|
||||||
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in); // derivative term
|
||||||
|
|
||||||
|
denom=denom + sin(kmu)*sin(kmu);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
conformable(U._grid,_grid);
|
|
||||||
conformable(U._grid,V._grid);
|
|
||||||
conformable(U._grid,mat._grid);
|
|
||||||
|
|
||||||
mat.checkerboard = U.checkerboard;
|
|
||||||
|
|
||||||
DerivInternal(Stencil,Umu,mat,U,V,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
conformable(U._grid,_cbgrid);
|
|
||||||
conformable(U._grid,V._grid);
|
|
||||||
conformable(U._grid,mat._grid);
|
|
||||||
|
|
||||||
assert(V.checkerboard==Even);
|
|
||||||
assert(U.checkerboard==Odd);
|
|
||||||
mat.checkerboard = Odd;
|
|
||||||
|
|
||||||
DerivInternal(StencilEven,UmuOdd,mat,U,V,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
|
|
||||||
{
|
|
||||||
conformable(U._grid,_cbgrid);
|
|
||||||
conformable(U._grid,V._grid);
|
|
||||||
conformable(U._grid,mat._grid);
|
|
||||||
|
|
||||||
assert(V.checkerboard==Odd);
|
|
||||||
assert(U.checkerboard==Even);
|
|
||||||
mat.checkerboard = Even;
|
|
||||||
|
|
||||||
DerivInternal(StencilOdd,UmuEven,mat,U,V,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
wilson = wilson + _m; // 2 sin^2 k/2 + m
|
||||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) {
|
|
||||||
conformable(in._grid,_grid); // verifies full grid
|
|
||||||
conformable(in._grid,out._grid);
|
|
||||||
|
|
||||||
out.checkerboard = in.checkerboard;
|
|
||||||
|
|
||||||
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) {
|
|
||||||
conformable(in._grid,_cbgrid); // verifies half grid
|
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
|
||||||
|
|
||||||
assert(in.checkerboard==Even);
|
|
||||||
out.checkerboard = Odd;
|
|
||||||
|
|
||||||
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
|
|
||||||
conformable(in._grid,_cbgrid); // verifies half grid
|
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
|
||||||
|
|
||||||
assert(in.checkerboard==Odd);
|
|
||||||
out.checkerboard = Even;
|
|
||||||
|
|
||||||
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::Mdir (const FermionField &in, FermionField &out,int dir,int disp) {
|
|
||||||
DhopDir(in,out,dir,disp);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
num = num + wilson*in; // -i gmu sin k + 2 sin^2 k/2 + m
|
||||||
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){
|
|
||||||
|
|
||||||
int skip = (disp==1) ? 0 : 1;
|
|
||||||
int dirdisp = dir+skip*4;
|
|
||||||
int gamma = dir+(1-skip)*4;
|
|
||||||
|
|
||||||
DhopDirDisp(in,out,dirdisp,gamma,DaggerNo);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) {
|
|
||||||
|
|
||||||
Compressor compressor(dag);
|
|
||||||
|
|
||||||
Stencil.HaloExchange(in,compressor);
|
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma);
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class Impl>
|
denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2
|
||||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,LebesgueOrder& lo,DoubledGaugeField & U,
|
|
||||||
const FermionField &in, FermionField &out,int dag)
|
|
||||||
{
|
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
|
||||||
|
|
||||||
Compressor compressor(dag);
|
denom= one/denom;
|
||||||
st.HaloExchange(in,compressor);
|
|
||||||
|
|
||||||
if ( dag == DaggerYes ) {
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
PARALLEL_FOR_LOOP
|
|
||||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
|
||||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonFermion);
|
|
||||||
GparityFermOpTemplateInstantiate(WilsonFermion);
|
|
||||||
|
|
||||||
|
///////////////////////////////////
|
||||||
|
// Internal
|
||||||
|
///////////////////////////////////
|
||||||
|
|
||||||
}}
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||||
|
GaugeField &mat, const FermionField &A,
|
||||||
|
const FermionField &B, int dag) {
|
||||||
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
Compressor compressor(dag);
|
||||||
|
|
||||||
|
FermionField Btilde(B._grid);
|
||||||
|
FermionField Atilde(B._grid);
|
||||||
|
Atilde = A;
|
||||||
|
|
||||||
|
st.HaloExchange(B, compressor);
|
||||||
|
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Flip gamma (1+g)<->(1-g) if dag
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
int gamma = mu;
|
||||||
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
|
////////////////////////
|
||||||
|
// Call the single hop
|
||||||
|
////////////////////////
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < B._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu,
|
||||||
|
gamma);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
// spin trace outer product
|
||||||
|
//////////////////////////////////////////////////
|
||||||
|
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag) {
|
||||||
|
conformable(U._grid, _grid);
|
||||||
|
conformable(U._grid, V._grid);
|
||||||
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
|
mat.checkerboard = U.checkerboard;
|
||||||
|
|
||||||
|
DerivInternal(Stencil, Umu, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag) {
|
||||||
|
conformable(U._grid, _cbgrid);
|
||||||
|
conformable(U._grid, V._grid);
|
||||||
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
|
assert(V.checkerboard == Even);
|
||||||
|
assert(U.checkerboard == Odd);
|
||||||
|
mat.checkerboard = Odd;
|
||||||
|
|
||||||
|
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U,
|
||||||
|
const FermionField &V, int dag) {
|
||||||
|
conformable(U._grid, _cbgrid);
|
||||||
|
conformable(U._grid, V._grid);
|
||||||
|
conformable(U._grid, mat._grid);
|
||||||
|
|
||||||
|
assert(V.checkerboard == Odd);
|
||||||
|
assert(U.checkerboard == Even);
|
||||||
|
mat.checkerboard = Even;
|
||||||
|
|
||||||
|
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,
|
||||||
|
int dag) {
|
||||||
|
conformable(in._grid, _grid); // verifies full grid
|
||||||
|
conformable(in._grid, out._grid);
|
||||||
|
|
||||||
|
out.checkerboard = in.checkerboard;
|
||||||
|
|
||||||
|
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,
|
||||||
|
int dag) {
|
||||||
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
|
assert(in.checkerboard == Even);
|
||||||
|
out.checkerboard = Odd;
|
||||||
|
|
||||||
|
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,
|
||||||
|
int dag) {
|
||||||
|
conformable(in._grid, _cbgrid); // verifies half grid
|
||||||
|
conformable(in._grid, out._grid); // drops the cb check
|
||||||
|
|
||||||
|
assert(in.checkerboard == Odd);
|
||||||
|
out.checkerboard = Even;
|
||||||
|
|
||||||
|
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out,
|
||||||
|
int dir, int disp) {
|
||||||
|
DhopDir(in, out, dir, disp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,
|
||||||
|
int dir, int disp) {
|
||||||
|
int skip = (disp == 1) ? 0 : 1;
|
||||||
|
int dirdisp = dir + skip * 4;
|
||||||
|
int gamma = dir + (1 - skip) * 4;
|
||||||
|
|
||||||
|
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||||
|
int dirdisp, int gamma, int dag) {
|
||||||
|
Compressor compressor(dag);
|
||||||
|
|
||||||
|
Stencil.HaloExchange(in, compressor);
|
||||||
|
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out,
|
||||||
|
dirdisp, gamma);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||||
|
DoubledGaugeField &U,
|
||||||
|
const FermionField &in,
|
||||||
|
FermionField &out, int dag) {
|
||||||
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
|
Compressor compressor(dag);
|
||||||
|
st.HaloExchange(in, compressor);
|
||||||
|
|
||||||
|
if (dag == DaggerYes) {
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
|
||||||
|
out);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||||
|
Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
|
||||||
|
out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
FermOpTemplateInstantiate(WilsonFermion);
|
||||||
|
AdjointFermOpTemplateInstantiate(WilsonFermion);
|
||||||
|
TwoIndexFermOpTemplateInstantiate(WilsonFermion);
|
||||||
|
GparityFermOpTemplateInstantiate(WilsonFermion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,161 +1,154 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonFermion.h
|
Source file: ./lib/qcd/action/fermion/WilsonFermion.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_WILSON_FERMION_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_WILSON_FERMION_H
|
#ifndef GRID_QCD_WILSON_FERMION_H
|
||||||
|
#define GRID_QCD_WILSON_FERMION_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
class WilsonFermionStatic {
|
class WilsonFermionStatic {
|
||||||
public:
|
public:
|
||||||
static int HandOptDslash; // these are a temporary hack
|
static int HandOptDslash; // these are a temporary hack
|
||||||
static int MortonOrder;
|
static int MortonOrder;
|
||||||
static const std::vector<int> directions ;
|
static const std::vector<int> directions;
|
||||||
static const std::vector<int> displacements;
|
static const std::vector<int> displacements;
|
||||||
static const int npoint=8;
|
static const int npoint = 8;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic
|
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
||||||
{
|
public:
|
||||||
public:
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
typedef WilsonKernels<Impl> Kernels;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
GridBase *GaugeGrid(void) { return _grid ;}
|
GridBase *GaugeGrid(void) { return _grid; }
|
||||||
GridBase *GaugeRedBlackGrid(void) { return _cbgrid ;}
|
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
|
||||||
GridBase *FermionGrid(void) { return _grid;}
|
GridBase *FermionGrid(void) { return _grid; }
|
||||||
GridBase *FermionRedBlackGrid(void) { return _cbgrid;}
|
GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
RealD M(const FermionField &in, FermionField &out);
|
RealD M(const FermionField &in, FermionField &out);
|
||||||
RealD Mdag(const FermionField &in, FermionField &out);
|
RealD Mdag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
// half checkerboard operations
|
// half checkerboard operations
|
||||||
// could remain virtual so we can derive Clover from Wilson base
|
// could remain virtual so we can derive Clover from Wilson base
|
||||||
/////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////
|
||||||
void Meooe(const FermionField &in, FermionField &out) ;
|
void Meooe(const FermionField &in, FermionField &out);
|
||||||
void MeooeDag(const FermionField &in, FermionField &out) ;
|
void MeooeDag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
// allow override for twisted mass and clover
|
// allow override for twisted mass and clover
|
||||||
virtual void Mooee(const FermionField &in, FermionField &out) ;
|
virtual void Mooee(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeDag(const FermionField &in, FermionField &out) ;
|
virtual void MooeeDag(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInv(const FermionField &in, FermionField &out) ;
|
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
||||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
|
virtual void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
////////////////////////
|
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass) ;
|
||||||
// Derivative interface
|
|
||||||
////////////////////////
|
////////////////////////
|
||||||
// Interface calls an internal routine
|
// Derivative interface
|
||||||
void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
////////////////////////
|
||||||
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
// Interface calls an internal routine
|
||||||
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// non-hermitian hopping term; half cb or both
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void Dhop(const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopOE(const FermionField &in, FermionField &out, int dag);
|
||||||
|
void DhopEO(const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Multigrid assistance; force term uses too
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
|
||||||
|
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
|
||||||
|
int gamma, int dag);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Extra methods added by derived
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat,
|
||||||
|
const FermionField &A, const FermionField &B, int dag);
|
||||||
|
|
||||||
|
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
|
const FermionField &in, FermionField &out, int dag);
|
||||||
|
|
||||||
|
// Constructor
|
||||||
|
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||||
|
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||||
|
const ImplParams &p = ImplParams());
|
||||||
|
|
||||||
|
// DoubleStore impl dependent
|
||||||
|
void ImportGauge(const GaugeField &_Umu);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Data members require to support the functionality
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// protected:
|
||||||
|
public:
|
||||||
|
RealD mass;
|
||||||
|
|
||||||
|
GridBase *_grid;
|
||||||
|
GridBase *_cbgrid;
|
||||||
|
|
||||||
|
// Defines the stencils for even and odd
|
||||||
|
StencilImpl Stencil;
|
||||||
|
StencilImpl StencilEven;
|
||||||
|
StencilImpl StencilOdd;
|
||||||
|
|
||||||
|
// Copy of the gauge field , with even and odd subsets
|
||||||
|
DoubledGaugeField Umu;
|
||||||
|
DoubledGaugeField UmuEven;
|
||||||
|
DoubledGaugeField UmuOdd;
|
||||||
|
|
||||||
|
LebesgueOrder Lebesgue;
|
||||||
|
LebesgueOrder LebesgueEvenOdd;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
||||||
|
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
}
|
||||||
// non-hermitian hopping term; half cb or both
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
void Dhop(const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
void DhopOE(const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
void DhopEO(const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Multigrid assistance; force term uses too
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ;
|
|
||||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
|
||||||
void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Extra methods added by derived
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
void DerivInternal(StencilImpl & st,
|
|
||||||
DoubledGaugeField & U,
|
|
||||||
GaugeField &mat,
|
|
||||||
const FermionField &A,
|
|
||||||
const FermionField &B,
|
|
||||||
int dag);
|
|
||||||
|
|
||||||
void DhopInternal(StencilImpl & st,LebesgueOrder & lo,DoubledGaugeField & U,
|
|
||||||
const FermionField &in, FermionField &out,int dag) ;
|
|
||||||
|
|
||||||
// Constructor
|
|
||||||
WilsonFermion(GaugeField &_Umu,
|
|
||||||
GridCartesian &Fgrid,
|
|
||||||
GridRedBlackCartesian &Hgrid,
|
|
||||||
RealD _mass,
|
|
||||||
const ImplParams &p= ImplParams()
|
|
||||||
) ;
|
|
||||||
|
|
||||||
// DoubleStore impl dependent
|
|
||||||
void ImportGauge(const GaugeField &_Umu);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Data members require to support the functionality
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
// protected:
|
|
||||||
public:
|
|
||||||
|
|
||||||
RealD mass;
|
|
||||||
|
|
||||||
GridBase * _grid;
|
|
||||||
GridBase * _cbgrid;
|
|
||||||
|
|
||||||
//Defines the stencils for even and odd
|
|
||||||
StencilImpl Stencil;
|
|
||||||
StencilImpl StencilEven;
|
|
||||||
StencilImpl StencilOdd;
|
|
||||||
|
|
||||||
// Copy of the gauge field , with even and odd subsets
|
|
||||||
DoubledGaugeField Umu;
|
|
||||||
DoubledGaugeField UmuEven;
|
|
||||||
DoubledGaugeField UmuOdd;
|
|
||||||
|
|
||||||
LebesgueOrder Lebesgue;
|
|
||||||
LebesgueOrder LebesgueEvenOdd;
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
|
|
||||||
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -42,15 +42,15 @@ const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1
|
|||||||
// 5d lattice for DWF.
|
// 5d lattice for DWF.
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _M5,const ImplParams &p) :
|
RealD _M5,const ImplParams &p) :
|
||||||
Kernels(p),
|
Kernels(p),
|
||||||
_FiveDimGrid(&FiveDimGrid),
|
_FiveDimGrid (&FiveDimGrid),
|
||||||
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
||||||
_FourDimGrid(&FourDimGrid),
|
_FourDimGrid (&FourDimGrid),
|
||||||
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
_FourDimRedBlackGrid(&FourDimRedBlackGrid),
|
||||||
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
||||||
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
||||||
@ -62,60 +62,83 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
Lebesgue(_FourDimGrid),
|
Lebesgue(_FourDimGrid),
|
||||||
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
LebesgueEvenOdd(_FourDimRedBlackGrid)
|
||||||
{
|
{
|
||||||
// some assertions
|
if (Impl::LsVectorised) {
|
||||||
assert(FiveDimGrid._ndimension==5);
|
|
||||||
assert(FourDimGrid._ndimension==4);
|
|
||||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
|
||||||
assert(FourDimRedBlackGrid._ndimension==4);
|
|
||||||
assert(FiveDimRedBlackGrid._checker_dim==1);
|
|
||||||
|
|
||||||
// Dimension zero of the five-d is the Ls direction
|
int nsimd = Simd::Nsimd();
|
||||||
Ls=FiveDimGrid._fdimensions[0];
|
|
||||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
// some assertions
|
||||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
assert(FiveDimGrid._ndimension==5);
|
||||||
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||||
assert(FiveDimGrid._processors[0] ==1);
|
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
|
||||||
assert(FiveDimGrid._simd_layout[0] ==1);
|
assert(FourDimGrid._ndimension==4);
|
||||||
|
|
||||||
// Other dimensions must match the decomposition of the four-D fields
|
// Dimension zero of the five-d is the Ls direction
|
||||||
for(int d=0;d<4;d++){
|
Ls=FiveDimGrid._fdimensions[0];
|
||||||
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
assert(FiveDimGrid._processors[0] ==1);
|
||||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||||
|
|
||||||
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||||
|
|
||||||
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
// Other dimensions must match the decomposition of the four-D fields
|
||||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
for(int d=0;d<4;d++){
|
||||||
|
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
|
|
||||||
|
assert(FourDimGrid._simd_layout[d]=1);
|
||||||
|
assert(FourDimRedBlackGrid._simd_layout[d]=1);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||||
|
|
||||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// some assertions
|
||||||
|
assert(FiveDimGrid._ndimension==5);
|
||||||
|
assert(FourDimGrid._ndimension==4);
|
||||||
|
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||||
|
assert(FourDimRedBlackGrid._ndimension==4);
|
||||||
|
assert(FiveDimRedBlackGrid._checker_dim==1);
|
||||||
|
|
||||||
|
// Dimension zero of the five-d is the Ls direction
|
||||||
|
Ls=FiveDimGrid._fdimensions[0];
|
||||||
|
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||||
|
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
|
||||||
|
assert(FiveDimGrid._processors[0] ==1);
|
||||||
|
assert(FiveDimGrid._simd_layout[0] ==1);
|
||||||
|
|
||||||
|
// Other dimensions must match the decomposition of the four-D fields
|
||||||
|
for(int d=0;d<4;d++){
|
||||||
|
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||||
|
|
||||||
|
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
|
|
||||||
|
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
|
||||||
|
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
|
||||||
|
|
||||||
|
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||||
|
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||||
|
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate the required comms buffer
|
// Allocate the required comms buffer
|
||||||
ImportGauge(_Umu);
|
ImportGauge(_Umu);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
RealD _M5,const ImplParams &p) :
|
RealD _M5,const ImplParams &p) :
|
||||||
Kernels(p),
|
|
||||||
_FiveDimGrid (&FiveDimGrid),
|
|
||||||
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
|
||||||
_FourDimGrid (&FourDimGrid),
|
|
||||||
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
|
||||||
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
|
||||||
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
|
|
||||||
M5(_M5),
|
|
||||||
Umu(_FourDimGrid),
|
|
||||||
UmuEven(_FourDimGrid),
|
|
||||||
UmuOdd (_FourDimGrid),
|
|
||||||
Lebesgue(_FourDimGrid),
|
|
||||||
LebesgueEvenOdd(_FourDimGrid)
|
|
||||||
{
|
{
|
||||||
int nsimd = Simd::Nsimd();
|
int nsimd = Simd::Nsimd();
|
||||||
|
|
||||||
@ -148,13 +171,68 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
GaugeField HUmu(_Umu._grid);
|
|
||||||
HUmu = _Umu*(-0.5);
|
|
||||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
|
||||||
UmuEven=Umu;// Really want a reference.
|
|
||||||
UmuOdd =Umu;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::Report(void)
|
||||||
|
{
|
||||||
|
std::vector<int> latt = GridDefaultLatt();
|
||||||
|
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||||
|
RealD NP = _FourDimGrid->_Nprocessors;
|
||||||
|
|
||||||
|
if ( DhopCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime<< " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " << DhopComputeTime << " us" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||||
|
|
||||||
|
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( DerivCalls > 0 ) {
|
||||||
|
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||||
|
|
||||||
|
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DerivCalls > 0 || DhopCalls > 0){
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||||
|
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
||||||
|
DhopCalls = 0;
|
||||||
|
DhopCommTime = 0;
|
||||||
|
DhopComputeTime = 0;
|
||||||
|
|
||||||
|
DerivCalls = 0;
|
||||||
|
DerivCommTime = 0;
|
||||||
|
DerivComputeTime = 0;
|
||||||
|
DerivDhopComputeTime = 0;
|
||||||
|
|
||||||
|
Stencil.ZeroCounters();
|
||||||
|
StencilEven.ZeroCounters();
|
||||||
|
StencilOdd.ZeroCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -190,19 +268,20 @@ PARALLEL_FOR_LOOP
|
|||||||
for(int s=0;s<Ls;s++){
|
for(int s=0;s<Ls;s++){
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
int sF = s+Ls*sU;
|
int sF = s+Ls*sU;
|
||||||
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sF,sU,in,out,dirdisp,gamma);
|
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
GaugeField &mat,
|
GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
|
DerivCalls++;
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
|
|
||||||
conformable(st._grid,A._grid);
|
conformable(st._grid,A._grid);
|
||||||
@ -213,51 +292,52 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
FermionField Btilde(B._grid);
|
FermionField Btilde(B._grid);
|
||||||
FermionField Atilde(B._grid);
|
FermionField Atilde(B._grid);
|
||||||
|
|
||||||
|
DerivCommTime-=usecond();
|
||||||
st.HaloExchange(B,compressor);
|
st.HaloExchange(B,compressor);
|
||||||
|
DerivCommTime+=usecond();
|
||||||
|
|
||||||
Atilde=A;
|
Atilde=A;
|
||||||
|
|
||||||
for(int mu=0;mu<Nd;mu++){
|
DerivComputeTime-=usecond();
|
||||||
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Flip gamma if dag
|
// Flip gamma if dag
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
int gamma = mu;
|
int gamma = mu;
|
||||||
if ( !dag ) gamma+= Nd;
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
////////////////////////
|
////////////////////////
|
||||||
// Call the single hop
|
// Call the single hop
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
DerivDhopComputeTime -= usecond();
|
||||||
for(int sss=0;sss<U._grid->oSites();sss++){
|
PARALLEL_FOR_LOOP
|
||||||
for(int s=0;s<Ls;s++){
|
for (int sss = 0; sss < U._grid->oSites(); sss++) {
|
||||||
int sU=sss;
|
for (int s = 0; s < Ls; s++) {
|
||||||
int sF = s+Ls*sU;
|
int sU = sss;
|
||||||
|
int sF = s + Ls * sU;
|
||||||
|
|
||||||
assert ( sF< B._grid->oSites());
|
assert(sF < B._grid->oSites());
|
||||||
assert ( sU< U._grid->oSites());
|
assert(sU < U._grid->oSites());
|
||||||
|
|
||||||
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
|
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
|
||||||
|
|
||||||
////////////////////////////
|
|
||||||
// spin trace outer product
|
|
||||||
////////////////////////////
|
|
||||||
|
|
||||||
|
////////////////////////////
|
||||||
|
// spin trace outer product
|
||||||
|
////////////////////////////
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
DerivDhopComputeTime += usecond();
|
||||||
Impl::InsertForce5D(mat,Btilde,Atilde,mu);
|
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
DerivComputeTime += usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
conformable(A._grid,FermionGrid());
|
conformable(A._grid,FermionGrid());
|
||||||
conformable(A._grid,B._grid);
|
conformable(A._grid,B._grid);
|
||||||
@ -288,9 +368,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
conformable(A._grid,FermionRedBlackGrid());
|
conformable(A._grid,FermionRedBlackGrid());
|
||||||
conformable(GaugeRedBlackGrid(),mat._grid);
|
conformable(GaugeRedBlackGrid(),mat._grid);
|
||||||
@ -313,30 +393,56 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|||||||
|
|
||||||
int LLs = in._grid->_rdimensions[0];
|
int LLs = in._grid->_rdimensions[0];
|
||||||
|
|
||||||
|
DhopCommTime-=usecond();
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
|
DhopCommTime+=usecond();
|
||||||
|
|
||||||
|
DhopComputeTime-=usecond();
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
if ( dag == DaggerYes ) {
|
if (dag == DaggerYes) {
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
int sU=ss;
|
int sU = ss;
|
||||||
int sF=LLs*sU;
|
int sF = LLs * sU;
|
||||||
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sF, sU, LLs, 1, in, out);
|
||||||
}
|
}
|
||||||
} else {
|
#ifdef AVX512
|
||||||
PARALLEL_FOR_LOOP
|
} else if (stat.is_init() ) {
|
||||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
|
||||||
|
int nthreads;
|
||||||
|
stat.start();
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp master
|
||||||
|
nthreads = omp_get_num_threads();
|
||||||
|
int mythread = omp_get_thread_num();
|
||||||
|
stat.enter(mythread);
|
||||||
|
#pragma omp for nowait
|
||||||
|
for(int ss=0;ss<U._grid->oSites();ss++) {
|
||||||
int sU=ss;
|
int sU=ss;
|
||||||
int sF=LLs*sU;
|
int sF=LLs*sU;
|
||||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||||
|
}
|
||||||
|
stat.exit(mythread);
|
||||||
|
}
|
||||||
|
stat.accum(nthreads);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
PARALLEL_FOR_LOOP
|
||||||
|
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||||
|
int sU = ss;
|
||||||
|
int sF = LLs * sU;
|
||||||
|
Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DhopComputeTime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
|
DhopCalls++;
|
||||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
conformable(in._grid,out._grid); // drops the cb check
|
||||||
|
|
||||||
@ -348,6 +454,7 @@ void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
|
DhopCalls++;
|
||||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in._grid,out._grid); // drops the cb check
|
conformable(in._grid,out._grid); // drops the cb check
|
||||||
|
|
||||||
@ -359,6 +466,7 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
|
DhopCalls+=2;
|
||||||
conformable(in._grid,FermionGrid()); // verifies full grid
|
conformable(in._grid,FermionGrid()); // verifies full grid
|
||||||
conformable(in._grid,out._grid);
|
conformable(in._grid,out._grid);
|
||||||
|
|
||||||
@ -374,10 +482,150 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
|
|||||||
axpy(out,4.0-M5,in,out);
|
axpy(out,4.0-M5,in,out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const FermionField &in, RealD mass)
|
||||||
|
{
|
||||||
|
// what type LatticeComplex
|
||||||
|
GridBase *_grid = _FourDimGrid;
|
||||||
|
conformable(_grid,out._grid);
|
||||||
|
|
||||||
|
typedef typename FermionField::vector_type vector_type;
|
||||||
|
typedef typename FermionField::scalar_type ScalComplex;
|
||||||
|
typedef iSinglet<ScalComplex> Tcomplex;
|
||||||
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||||
|
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<int> latt_size = _grid->_fdimensions;
|
||||||
|
|
||||||
|
|
||||||
|
FermionField num (_grid); num = zero;
|
||||||
|
|
||||||
|
LatComplex sk(_grid); sk = zero;
|
||||||
|
LatComplex sk2(_grid); sk2= zero;
|
||||||
|
LatComplex W(_grid); W= zero;
|
||||||
|
LatComplex a(_grid); a= zero;
|
||||||
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||||
|
LatComplex denom(_grid); denom= zero;
|
||||||
|
LatComplex cosha(_grid);
|
||||||
|
LatComplex kmu(_grid);
|
||||||
|
LatComplex Wea(_grid);
|
||||||
|
LatComplex Wema(_grid);
|
||||||
|
|
||||||
|
ScalComplex ci(0.0,1.0);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++) {
|
||||||
|
|
||||||
|
LatticeCoordinate(kmu,mu);
|
||||||
|
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
|
||||||
|
kmu = TwoPiL * kmu;
|
||||||
|
|
||||||
|
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||||
|
sk = sk + sin(kmu) *sin(kmu);
|
||||||
|
|
||||||
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
W = one - M5 + sk2;
|
||||||
|
|
||||||
|
////////////////////////////////////////////
|
||||||
|
// Cosh alpha -> alpha
|
||||||
|
////////////////////////////////////////////
|
||||||
|
cosha = (one + W*W + sk) / (W*2.0);
|
||||||
|
|
||||||
|
// FIXME Need a Lattice acosh
|
||||||
|
for(int idx=0;idx<_grid->lSites();idx++){
|
||||||
|
std::vector<int> lcoor(Nd);
|
||||||
|
Tcomplex cc;
|
||||||
|
RealD sgn;
|
||||||
|
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||||
|
peekLocalSite(cc,cosha,lcoor);
|
||||||
|
assert((double)real(cc)>=1.0);
|
||||||
|
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||||
|
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||||
|
pokeLocalSite(cc,a,lcoor);
|
||||||
|
}
|
||||||
|
|
||||||
|
Wea = ( exp( a) * W );
|
||||||
|
Wema= ( exp(-a) * W );
|
||||||
|
|
||||||
|
num = num + ( one - Wema ) * mass * in;
|
||||||
|
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
||||||
|
out = num/denom;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Impl>
|
||||||
|
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass)
|
||||||
|
{
|
||||||
|
Gamma::GammaMatrix Gmu [] = {
|
||||||
|
Gamma::GammaX,
|
||||||
|
Gamma::GammaY,
|
||||||
|
Gamma::GammaZ,
|
||||||
|
Gamma::GammaT
|
||||||
|
};
|
||||||
|
|
||||||
|
GridBase *_grid = _FourDimGrid;
|
||||||
|
conformable(_grid,out._grid);
|
||||||
|
|
||||||
|
typedef typename FermionField::vector_type vector_type;
|
||||||
|
typedef typename FermionField::scalar_type ScalComplex;
|
||||||
|
|
||||||
|
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<int> latt_size = _grid->_fdimensions;
|
||||||
|
|
||||||
|
LatComplex sk(_grid); sk = zero;
|
||||||
|
LatComplex sk2(_grid); sk2= zero;
|
||||||
|
|
||||||
|
LatComplex w_k(_grid); w_k= zero;
|
||||||
|
LatComplex b_k(_grid); b_k= zero;
|
||||||
|
|
||||||
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||||
|
|
||||||
|
FermionField num (_grid); num = zero;
|
||||||
|
LatComplex denom(_grid); denom= zero;
|
||||||
|
LatComplex kmu(_grid);
|
||||||
|
ScalComplex ci(0.0,1.0);
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++) {
|
||||||
|
|
||||||
|
LatticeCoordinate(kmu,mu);
|
||||||
|
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
|
||||||
|
kmu = TwoPiL * kmu;
|
||||||
|
|
||||||
|
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||||
|
sk = sk + sin(kmu)*sin(kmu);
|
||||||
|
|
||||||
|
num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*in);
|
||||||
|
|
||||||
|
}
|
||||||
|
num = num + mass * in ;
|
||||||
|
|
||||||
|
b_k = sk2 - M5;
|
||||||
|
|
||||||
|
w_k = sqrt(sk + b_k*b_k);
|
||||||
|
|
||||||
|
denom= ( w_k + b_k + mass*mass) ;
|
||||||
|
|
||||||
|
denom= one/denom;
|
||||||
|
out = num*denom;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
FermOpTemplateInstantiate(WilsonFermion5D);
|
FermOpTemplateInstantiate(WilsonFermion5D);
|
||||||
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
||||||
template class WilsonFermion5D<DomainWallRedBlack5dImplF>;
|
|
||||||
template class WilsonFermion5D<DomainWallRedBlack5dImplD>;
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
@ -31,9 +31,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_QCD_WILSON_FERMION_5D_H
|
#ifndef GRID_QCD_WILSON_FERMION_5D_H
|
||||||
#define GRID_QCD_WILSON_FERMION_5D_H
|
#define GRID_QCD_WILSON_FERMION_5D_H
|
||||||
|
|
||||||
namespace Grid {
|
#include <Grid/Stat.h>
|
||||||
|
|
||||||
namespace QCD {
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// This is the 4d red black case appropriate to support
|
||||||
|
//
|
||||||
|
// parity = (x+y+z+t)|2;
|
||||||
|
// generalised five dim fermions like mobius, zolotarev etc..
|
||||||
|
//
|
||||||
|
// i.e. even even contains fifth dim hopping term.
|
||||||
|
//
|
||||||
|
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// This is the 4d red black case appropriate to support
|
// This is the 4d red black case appropriate to support
|
||||||
@ -60,6 +72,18 @@ namespace Grid {
|
|||||||
public:
|
public:
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
typedef WilsonKernels<Impl> Kernels;
|
typedef WilsonKernels<Impl> Kernels;
|
||||||
|
PmuStat stat;
|
||||||
|
|
||||||
|
void Report(void);
|
||||||
|
void ZeroCounters(void);
|
||||||
|
double DhopCalls;
|
||||||
|
double DhopCommTime;
|
||||||
|
double DhopComputeTime;
|
||||||
|
|
||||||
|
double DerivCalls;
|
||||||
|
double DerivCommTime;
|
||||||
|
double DerivComputeTime;
|
||||||
|
double DerivDhopComputeTime;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
@ -88,6 +112,9 @@ namespace Grid {
|
|||||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||||
|
|
||||||
|
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass) ;
|
||||||
|
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass) ;
|
||||||
|
|
||||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||||
// Implement s-diagonal DW
|
// Implement s-diagonal DW
|
||||||
void DW (const FermionField &in, FermionField &out,int dag);
|
void DW (const FermionField &in, FermionField &out,int dag);
|
||||||
@ -97,76 +124,78 @@ namespace Grid {
|
|||||||
|
|
||||||
// add a DhopComm
|
// add a DhopComm
|
||||||
// -- suboptimal interface will presently trigger multiple comms.
|
// -- suboptimal interface will presently trigger multiple comms.
|
||||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// New methods added
|
// New methods added
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
void DerivInternal(StencilImpl & st,
|
void DerivInternal(StencilImpl & st,
|
||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
GaugeField &mat,
|
GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag);
|
int dag);
|
||||||
|
|
||||||
void DhopInternal(StencilImpl & st,
|
void DhopInternal(StencilImpl & st,
|
||||||
LebesgueOrder &lo,
|
LebesgueOrder &lo,
|
||||||
DoubledGaugeField &U,
|
DoubledGaugeField &U,
|
||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out,
|
FermionField &out,
|
||||||
int dag);
|
int dag);
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
WilsonFermion5D(GaugeField &_Umu,
|
WilsonFermion5D(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
double _M5,const ImplParams &p= ImplParams());
|
double _M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
|
/*
|
||||||
WilsonFermion5D(int simd,
|
WilsonFermion5D(int simd,
|
||||||
GaugeField &_Umu,
|
GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||||
GridCartesian &FourDimGrid,
|
GridCartesian &FourDimGrid,
|
||||||
double _M5,const ImplParams &p= ImplParams());
|
double _M5,const ImplParams &p= ImplParams());
|
||||||
|
*/
|
||||||
|
|
||||||
|
// DoubleStore
|
||||||
|
void ImportGauge(const GaugeField &_Umu);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
// Data members require to support the functionality
|
||||||
|
///////////////////////////////////////////////////////////////
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Add these to the support from Wilson
|
||||||
|
GridBase *_FourDimGrid;
|
||||||
|
GridBase *_FourDimRedBlackGrid;
|
||||||
|
GridBase *_FiveDimGrid;
|
||||||
|
GridBase *_FiveDimRedBlackGrid;
|
||||||
|
|
||||||
|
double M5;
|
||||||
|
int Ls;
|
||||||
|
|
||||||
|
//Defines the stencils for even and odd
|
||||||
|
StencilImpl Stencil;
|
||||||
|
StencilImpl StencilEven;
|
||||||
|
StencilImpl StencilOdd;
|
||||||
|
|
||||||
|
// Copy of the gauge field , with even and odd subsets
|
||||||
|
DoubledGaugeField Umu;
|
||||||
|
DoubledGaugeField UmuEven;
|
||||||
|
DoubledGaugeField UmuOdd;
|
||||||
|
|
||||||
|
LebesgueOrder Lebesgue;
|
||||||
|
LebesgueOrder LebesgueEvenOdd;
|
||||||
|
|
||||||
|
// Comms buffer
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
// DoubleStore
|
}}
|
||||||
void ImportGauge(const GaugeField &_Umu);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
// Data members require to support the functionality
|
|
||||||
///////////////////////////////////////////////////////////////
|
|
||||||
public:
|
|
||||||
|
|
||||||
// Add these to the support from Wilson
|
|
||||||
GridBase *_FourDimGrid;
|
|
||||||
GridBase *_FourDimRedBlackGrid;
|
|
||||||
GridBase *_FiveDimGrid;
|
|
||||||
GridBase *_FiveDimRedBlackGrid;
|
|
||||||
|
|
||||||
double M5;
|
|
||||||
int Ls;
|
|
||||||
|
|
||||||
//Defines the stencils for even and odd
|
|
||||||
StencilImpl Stencil;
|
|
||||||
StencilImpl StencilEven;
|
|
||||||
StencilImpl StencilOdd;
|
|
||||||
|
|
||||||
// Copy of the gauge field , with even and odd subsets
|
|
||||||
DoubledGaugeField Umu;
|
|
||||||
DoubledGaugeField UmuEven;
|
|
||||||
DoubledGaugeField UmuOdd;
|
|
||||||
|
|
||||||
LebesgueOrder Lebesgue;
|
|
||||||
LebesgueOrder LebesgueEvenOdd;
|
|
||||||
|
|
||||||
// Comms buffer
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
|
||||||
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,98 +1,52 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
#include <Grid.h>
|
#include <Grid.h>
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
int WilsonKernelsStatic::HandOpt;
|
int WilsonKernelsStatic::Opt;
|
||||||
int WilsonKernelsStatic::AsmOpt;
|
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
|
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||||
|
|
||||||
template<class Impl>
|
////////////////////////////////////////////
|
||||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
// Generic implementation; move to different file?
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
////////////////////////////////////////////
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
#ifdef AVX512
|
|
||||||
if ( AsmOpt ) {
|
|
||||||
|
|
||||||
WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
template <class Impl>
|
||||||
|
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
} else {
|
SiteHalfSpinor *buf, int sF,
|
||||||
#else
|
int sU, const FermionField &in, FermionField &out) {
|
||||||
{
|
SiteHalfSpinor tmp;
|
||||||
#endif
|
SiteHalfSpinor chi;
|
||||||
for(int site=0;site<Ns;site++) {
|
|
||||||
for(int s=0;s<Ls;s++) {
|
|
||||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
else WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
// No asm implementation yet.
|
|
||||||
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
// else
|
|
||||||
for(int site=0;site<Ns;site++) {
|
|
||||||
for(int s=0;s<Ls;s++) {
|
|
||||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
|
||||||
sF++;
|
|
||||||
}
|
|
||||||
sU++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////
|
|
||||||
// Generic implementation; move to different file?
|
|
||||||
////////////////////////////////////////////
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
|
||||||
{
|
|
||||||
SiteHalfSpinor tmp;
|
|
||||||
SiteHalfSpinor chi;
|
|
||||||
SiteHalfSpinor *chi_p;
|
SiteHalfSpinor *chi_p;
|
||||||
SiteHalfSpinor Uchi;
|
SiteHalfSpinor Uchi;
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
@ -102,176 +56,174 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrd
|
|||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xp
|
// Xp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xp,sF);
|
SE = st.GetEntry(ptype, Xp, sF);
|
||||||
|
|
||||||
if (SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXp(tmp,in._odata[SE->_offset]);
|
spProjXp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXp(chi,in._odata[SE->_offset]);
|
spProjXp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
|
||||||
spReconXp(result,Uchi);
|
spReconXp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Yp,sF);
|
SE = st.GetEntry(ptype, Yp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYp(tmp,in._odata[SE->_offset]);
|
spProjYp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYp(chi,in._odata[SE->_offset]);
|
spProjYp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
|
||||||
accumReconYp(result,Uchi);
|
accumReconYp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zp,sF);
|
SE = st.GetEntry(ptype, Zp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZp(tmp,in._odata[SE->_offset]);
|
spProjZp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZp(chi,in._odata[SE->_offset]);
|
spProjZp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
|
||||||
accumReconZp(result,Uchi);
|
accumReconZp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tp,sF);
|
SE = st.GetEntry(ptype, Tp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTp(tmp,in._odata[SE->_offset]);
|
spProjTp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTp(chi,in._odata[SE->_offset]);
|
spProjTp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
|
||||||
accumReconTp(result,Uchi);
|
accumReconTp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xm,sF);
|
SE = st.GetEntry(ptype, Xm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXm(tmp,in._odata[SE->_offset]);
|
spProjXm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXm(chi,in._odata[SE->_offset]);
|
spProjXm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
|
||||||
accumReconXm(result,Uchi);
|
accumReconXm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Ym,sF);
|
SE = st.GetEntry(ptype, Ym, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYm(tmp,in._odata[SE->_offset]);
|
spProjYm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYm(chi,in._odata[SE->_offset]);
|
spProjYm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
|
||||||
accumReconYm(result,Uchi);
|
accumReconYm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zm,sF);
|
SE = st.GetEntry(ptype, Zm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZm(tmp,in._odata[SE->_offset]);
|
spProjZm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZm(chi,in._odata[SE->_offset]);
|
spProjZm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
|
||||||
accumReconZm(result,Uchi);
|
accumReconZm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tm,sF);
|
SE = st.GetEntry(ptype, Tm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTm(tmp,in._odata[SE->_offset]);
|
spProjTm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTm(chi,in._odata[SE->_offset]);
|
spProjTm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
|
||||||
accumReconTm(result,Uchi);
|
accumReconTm(result, Uchi);
|
||||||
|
|
||||||
vstream(out._odata[sF],result);
|
vstream(out._odata[sF], result);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Need controls to do interior, exterior, or both
|
||||||
// Need controls to do interior, exterior, or both
|
template <class Impl>
|
||||||
template<class Impl>
|
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
SiteHalfSpinor *buf, int sF,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
int sU, const FermionField &in, FermionField &out) {
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
SiteHalfSpinor tmp;
|
||||||
{
|
SiteHalfSpinor chi;
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor *chi_p;
|
||||||
SiteHalfSpinor chi;
|
|
||||||
SiteHalfSpinor *chi_p;
|
|
||||||
SiteHalfSpinor Uchi;
|
SiteHalfSpinor Uchi;
|
||||||
SiteSpinor result;
|
SiteSpinor result;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
@ -280,299 +232,297 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder
|
|||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xp
|
// Xp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xm,sF);
|
SE = st.GetEntry(ptype, Xm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXp(tmp,in._odata[SE->_offset]);
|
spProjXp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXp(chi,in._odata[SE->_offset]);
|
spProjXp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
|
||||||
spReconXp(result,Uchi);
|
spReconXp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Yp
|
// Yp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Ym,sF);
|
SE = st.GetEntry(ptype, Ym, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYp(tmp,in._odata[SE->_offset]);
|
spProjYp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYp(chi,in._odata[SE->_offset]);
|
spProjYp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
|
||||||
accumReconYp(result,Uchi);
|
accumReconYp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zp
|
// Zp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zm,sF);
|
SE = st.GetEntry(ptype, Zm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZp(tmp,in._odata[SE->_offset]);
|
spProjZp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZp(chi,in._odata[SE->_offset]);
|
spProjZp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
|
||||||
accumReconZp(result,Uchi);
|
accumReconZp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tp
|
// Tp
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tm,sF);
|
SE = st.GetEntry(ptype, Tm, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTp(tmp,in._odata[SE->_offset]);
|
spProjTp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTp(chi,in._odata[SE->_offset]);
|
spProjTp(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
|
||||||
accumReconTp(result,Uchi);
|
accumReconTp(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Xm
|
// Xm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Xp,sF);
|
SE = st.GetEntry(ptype, Xp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjXm(tmp,in._odata[SE->_offset]);
|
spProjXm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjXm(chi,in._odata[SE->_offset]);
|
spProjXm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
|
||||||
accumReconXm(result,Uchi);
|
accumReconXm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Ym
|
// Ym
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Yp,sF);
|
SE = st.GetEntry(ptype, Yp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjYm(tmp,in._odata[SE->_offset]);
|
spProjYm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjYm(chi,in._odata[SE->_offset]);
|
spProjYm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
|
||||||
accumReconYm(result,Uchi);
|
accumReconYm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Zm
|
// Zm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Zp,sF);
|
SE = st.GetEntry(ptype, Zp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjZm(tmp,in._odata[SE->_offset]);
|
spProjZm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjZm(chi,in._odata[SE->_offset]);
|
spProjZm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
|
||||||
accumReconZm(result,Uchi);
|
accumReconZm(result, Uchi);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Tm
|
// Tm
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
SE=st.GetEntry(ptype,Tp,sF);
|
SE = st.GetEntry(ptype, Tp, sF);
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
if (SE->_is_local) {
|
||||||
chi_p = χ
|
chi_p = χ
|
||||||
if ( SE->_permute ) {
|
if (SE->_permute) {
|
||||||
spProjTm(tmp,in._odata[SE->_offset]);
|
spProjTm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else {
|
} else {
|
||||||
spProjTm(chi,in._odata[SE->_offset]);
|
spProjTm(chi, in._odata[SE->_offset]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
chi_p=&buf[SE->_offset];
|
chi_p = &buf[SE->_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
|
||||||
accumReconTm(result,Uchi);
|
accumReconTm(result, Uchi);
|
||||||
|
|
||||||
vstream(out._odata[sF],result);
|
vstream(out._odata[sF], result);
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
|
void WilsonKernels<Impl>::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||||
int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma)
|
|
||||||
{
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor chi;
|
||||||
SiteHalfSpinor chi;
|
SiteSpinor result;
|
||||||
SiteSpinor result;
|
|
||||||
SiteHalfSpinor Uchi;
|
SiteHalfSpinor Uchi;
|
||||||
StencilEntry *SE;
|
StencilEntry *SE;
|
||||||
int ptype;
|
int ptype;
|
||||||
|
|
||||||
SE=st.GetEntry(ptype,dir,sF);
|
SE = st.GetEntry(ptype, dir, sF);
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
if(gamma==Xp){
|
if (gamma == Xp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjXp(tmp,in._odata[SE->_offset]);
|
spProjXp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjXp(chi,in._odata[SE->_offset]);
|
spProjXp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconXp(result,Uchi);
|
spReconXp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Yp
|
// Yp
|
||||||
if ( gamma==Yp ){
|
if (gamma == Yp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjYp(tmp,in._odata[SE->_offset]);
|
spProjYp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjYp(chi,in._odata[SE->_offset]);
|
spProjYp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconYp(result,Uchi);
|
spReconYp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
if ( gamma ==Zp ){
|
if (gamma == Zp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjZp(tmp,in._odata[SE->_offset]);
|
spProjZp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjZp(chi,in._odata[SE->_offset]);
|
spProjZp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconZp(result,Uchi);
|
spReconZp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
if ( gamma ==Tp ){
|
if (gamma == Tp) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjTp(tmp,in._odata[SE->_offset]);
|
spProjTp(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjTp(chi,in._odata[SE->_offset]);
|
spProjTp(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconTp(result,Uchi);
|
spReconTp(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
if ( gamma==Xm ){
|
if (gamma == Xm) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjXm(tmp,in._odata[SE->_offset]);
|
spProjXm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjXm(chi,in._odata[SE->_offset]);
|
spProjXm(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconXm(result,Uchi);
|
spReconXm(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
if ( gamma == Ym ){
|
if (gamma == Ym) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjYm(tmp,in._odata[SE->_offset]);
|
spProjYm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjYm(chi,in._odata[SE->_offset]);
|
spProjYm(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconYm(result,Uchi);
|
spReconYm(result, Uchi);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
if ( gamma == Zm ){
|
if (gamma == Zm) {
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
if (SE->_is_local && SE->_permute) {
|
||||||
spProjZm(tmp,in._odata[SE->_offset]);
|
spProjZm(tmp, in._odata[SE->_offset]);
|
||||||
permute(chi,tmp,ptype);
|
permute(chi, tmp, ptype);
|
||||||
} else if ( SE->_is_local ) {
|
} else if (SE->_is_local) {
|
||||||
spProjZm(chi,in._odata[SE->_offset]);
|
spProjZm(chi, in._odata[SE->_offset]);
|
||||||
} else {
|
} else {
|
||||||
chi=buf[SE->_offset];
|
chi = buf[SE->_offset];
|
||||||
}
|
}
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
spReconZm(result,Uchi);
|
spReconZm(result, Uchi);
|
||||||
}
|
|
||||||
|
|
||||||
// Tm
|
|
||||||
if ( gamma==Tm ) {
|
|
||||||
if ( SE->_is_local && SE->_permute ) {
|
|
||||||
spProjTm(tmp,in._odata[SE->_offset]);
|
|
||||||
permute(chi,tmp,ptype);
|
|
||||||
} else if ( SE->_is_local ) {
|
|
||||||
spProjTm(chi,in._odata[SE->_offset]);
|
|
||||||
} else {
|
|
||||||
chi=buf[SE->_offset];
|
|
||||||
}
|
|
||||||
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
|
|
||||||
spReconTm(result,Uchi);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vstream(out._odata[sF],result);
|
// Tm
|
||||||
|
if (gamma == Tm) {
|
||||||
|
if (SE->_is_local && SE->_permute) {
|
||||||
|
spProjTm(tmp, in._odata[SE->_offset]);
|
||||||
|
permute(chi, tmp, ptype);
|
||||||
|
} else if (SE->_is_local) {
|
||||||
|
spProjTm(chi, in._odata[SE->_offset]);
|
||||||
|
} else {
|
||||||
|
chi = buf[SE->_offset];
|
||||||
|
}
|
||||||
|
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
|
||||||
|
spReconTm(result, Uchi);
|
||||||
|
}
|
||||||
|
|
||||||
|
vstream(out._odata[sF], result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FermOpTemplateInstantiate(WilsonKernels);
|
||||||
FermOpTemplateInstantiate(WilsonKernels);
|
AdjointFermOpTemplateInstantiate(WilsonKernels);
|
||||||
|
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
|
||||||
template class WilsonKernels<DomainWallRedBlack5dImplF>;
|
|
||||||
template class WilsonKernels<DomainWallRedBlack5dImplD>;
|
|
||||||
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
@ -1,98 +1,183 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
|
Source file: ./lib/qcd/action/fermion/WilsonKernels.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
*************************************************************************************/
|
directory
|
||||||
/* END LEGAL */
|
*************************************************************************************/
|
||||||
#ifndef GRID_QCD_DHOP_H
|
/* END LEGAL */
|
||||||
#define GRID_QCD_DHOP_H
|
#ifndef GRID_QCD_DHOP_H
|
||||||
|
#define GRID_QCD_DHOP_H
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
namespace QCD {
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Helper routines that implement Wilson stencil for a single site.
|
||||||
|
// Common to both the WilsonFermion and WilsonFermion5D
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
class WilsonKernelsStatic {
|
||||||
|
public:
|
||||||
|
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
|
||||||
|
// S-direction is INNERMOST and takes no part in the parity.
|
||||||
|
static int Opt; // these are a temporary hack
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
||||||
|
public:
|
||||||
|
|
||||||
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
typedef FermionOperator<Impl> Base;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||||
|
DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
|
switch(Opt) {
|
||||||
|
#ifdef AVX512
|
||||||
|
case OptInlineAsm:
|
||||||
|
WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case OptHandUnroll:
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OptGeneric:
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||||
|
DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||||
|
// no kernel choice
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
||||||
|
DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||||
|
|
||||||
|
switch(Opt) {
|
||||||
|
#ifdef AVX512
|
||||||
|
case OptInlineAsm:
|
||||||
|
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case OptHandUnroll:
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OptGeneric:
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool EnableBool = true>
|
||||||
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
||||||
|
DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||||
|
|
||||||
|
for (int site = 0; site < Ns; site++) {
|
||||||
|
for (int s = 0; s < Ls; s++) {
|
||||||
|
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||||
|
sF++;
|
||||||
|
}
|
||||||
|
sU++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DiracOptDhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Specialised variants
|
||||||
|
void DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptAsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptAsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptHandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptHandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
WilsonKernels(const ImplParams &p = ImplParams());
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
namespace QCD {
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Helper routines that implement Wilson stencil for a single site.
|
|
||||||
// Common to both the WilsonFermion and WilsonFermion5D
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
class WilsonKernelsStatic {
|
|
||||||
public:
|
|
||||||
// S-direction is INNERMOST and takes no part in the parity.
|
|
||||||
static int AsmOpt; // these are a temporary hack
|
|
||||||
static int HandOpt; // these are a temporary hack
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
|
||||||
public:
|
|
||||||
|
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
|
||||||
typedef FermionOperator<Impl> Base;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
void DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Specialised variants
|
|
||||||
void DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in,FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
|
|
||||||
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out);
|
|
||||||
|
|
||||||
void DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
||||||
int sF,int sU,const FermionField &in, FermionField &out);
|
|
||||||
public:
|
|
||||||
|
|
||||||
WilsonKernels(const ImplParams &p= ImplParams());
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user